diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index 61a3b5c1406..2081207c8c7 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -5021,7 +5021,30 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query) { - unreachable("Timestamp queries are not supported."); + V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); + V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool); + + /* If this is called inside a render pass we need to finish the current + * job here... + */ + if (cmd_buffer->state.pass) + v3dv_cmd_buffer_finish_job(cmd_buffer); + + struct v3dv_job *job = + v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, + V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY, + cmd_buffer, -1); + v3dv_return_if_oom(cmd_buffer, NULL); + + job->cpu.query_timestamp.pool = query_pool; + job->cpu.query_timestamp.query = query; + + list_addtail(&job->list_link, &cmd_buffer->jobs); + cmd_buffer->state.job = NULL; + + /* ...and resume the subpass after the timestamp */ + if (cmd_buffer->state.pass) + v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx); } static void diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 55755f5ef84..a5f002be78a 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -814,6 +814,11 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, const VkSampleCountFlags supported_sample_counts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; + struct timespec clock_res; + clock_getres(CLOCK_MONOTONIC, &clock_res); + const float timestamp_period = + clock_res.tv_sec * 1000000000.0f + clock_res.tv_nsec; + /* FIXME: this will probably require an in-depth review */ VkPhysicalDeviceLimits limits = { .maxImageDimension1D = 4096, @@ -923,8 +928,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .sampledImageStencilSampleCounts = supported_sample_counts, .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = false, - .timestampPeriod = 0.0f, + .timestampComputeAndGraphics = true, + .timestampPeriod = timestamp_period, .maxClipDistances = 8, .maxCullDistances = 0, .maxCombinedClipAndCullDistances = 8, @@ -990,7 +995,7 @@ v3dv_queue_family_properties = { VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = 1, - .timestampValidBits = 0, /* FIXME */ + .timestampValidBits = 64, .minImageTransferGranularity = { 1, 1, 1 }, }; diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 0a916cb9169..2017941e023 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -744,6 +744,7 @@ enum v3dv_job_type { V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS, V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE, V3DV_JOB_TYPE_CPU_CSD_INDIRECT, + V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY, }; struct v3dv_reset_query_cpu_job_info { @@ -810,6 +811,11 @@ struct v3dv_csd_indirect_cpu_job_info { bool needs_wg_uniform_rewrite; }; +struct v3dv_timestamp_query_cpu_job_info { + struct v3dv_query_pool *pool; + uint32_t query; +}; + struct v3dv_job { struct list_head list_link; @@ -881,6 +887,7 @@ struct v3dv_job { struct v3dv_clear_attachments_cpu_job_info clear_attachments; struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image; struct v3dv_csd_indirect_cpu_job_info csd_indirect; + struct v3dv_timestamp_query_cpu_job_info query_timestamp; } cpu; /* Job specs for TFU jobs */ @@ -1084,10 +1091,14 @@ struct v3dv_resource { struct v3dv_query { bool maybe_available; - struct v3dv_bo *bo; + union { + struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */ + uint64_t value; /* Used by CPU queries (timestamp) */ + }; }; struct v3dv_query_pool { + VkQueryType query_type; uint32_t query_count; struct v3dv_query *queries; }; diff --git a/src/broadcom/vulkan/v3dv_query.c b/src/broadcom/vulkan/v3dv_query.c index 7c2ce104365..7224de47301 100644 --- a/src/broadcom/vulkan/v3dv_query.c +++ b/src/broadcom/vulkan/v3dv_query.c @@ -31,12 +31,12 @@ v3dv_CreateQueryPool(VkDevice _device, { V3DV_FROM_HANDLE(v3dv_device, device, _device); - assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION); + assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION || + pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP); assert(pCreateInfo->queryCount > 0); - /* FIXME: the hw allows us to allocate up to 16 queries in a single block - * so we should try to use that. + * for occlussion queries so we should try to use that. */ struct v3dv_query_pool *pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, @@ -44,6 +44,7 @@ v3dv_CreateQueryPool(VkDevice _device, if (pool == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + pool->query_type = pCreateInfo->queryType; pool->query_count = pCreateInfo->queryCount; VkResult result; @@ -59,16 +60,24 @@ v3dv_CreateQueryPool(VkDevice _device, uint32_t i; for (i = 0; i < pool->query_count; i++) { pool->queries[i].maybe_available = false; - pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true); - if (!pool->queries[i].bo) { - result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_alloc_bo; - } - - /* For occlusion queries we only need a 4-byte counter */ - if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) { - result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_alloc_bo; + switch (pool->query_type) { + case VK_QUERY_TYPE_OCCLUSION: + pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true); + if (!pool->queries[i].bo) { + result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_alloc_bo; + } + /* For occlusion queries we only need a 4-byte counter */ + if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) { + result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_alloc_bo; + } + break; + case VK_QUERY_TYPE_TIMESTAMP: + pool->queries[i].value = 0; + break; + default: + unreachable("Unsupported query type"); } } @@ -98,21 +107,105 @@ v3dv_DestroyQueryPool(VkDevice _device, if (!pool) return; - for (uint32_t i = 0; i < pool->query_count; i++) - v3dv_bo_free(device, pool->queries[i].bo); + if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) { + for (uint32_t i = 0; i < pool->query_count; i++) + v3dv_bo_free(device, pool->queries[i].bo); + } + vk_free2(&device->alloc, pAllocator, pool->queries); vk_free2(&device->alloc, pAllocator, pool); } static void -write_query_result(void *dst, uint32_t idx, bool do_64bit, uint32_t value) +write_query_result(void *dst, uint32_t idx, bool do_64bit, uint64_t value) { if (do_64bit) { uint64_t *dst64 = (uint64_t *) dst; dst64[idx] = value; } else { uint32_t *dst32 = (uint32_t *) dst; - dst32[idx] = value; + dst32[idx] = (uint32_t) value; + } +} + +static uint64_t +get_occlusion_query_result(struct v3dv_device *device, + struct v3dv_query_pool *pool, + uint32_t query, + bool do_wait, + bool *available) +{ + assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION); + + struct v3dv_query *q = &pool->queries[query]; + assert(q->bo && q->bo->map); + + if (do_wait) { + /* From the Vulkan 1.0 spec: + * + * "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not + * become available in a finite amount of time (e.g. due to not + * issuing a query since the last reset), a VK_ERROR_DEVICE_LOST + * error may occur." + */ + if (!q->maybe_available) + return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + + if (!v3dv_bo_wait(device, q->bo, 0xffffffffffffffffull)) + return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + + *available = true; + } else { + *available = q->maybe_available && v3dv_bo_wait(device, q->bo, 0); + } + + return (uint64_t) *((uint32_t *) q->bo->map); +} + +static uint64_t +get_timestamp_query_result(struct v3dv_device *device, + struct v3dv_query_pool *pool, + uint32_t query, + bool do_wait, + bool *available) +{ + assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP); + + struct v3dv_query *q = &pool->queries[query]; + + if (do_wait) { + /* From the Vulkan 1.0 spec: + * + * "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not + * become available in a finite amount of time (e.g. due to not + * issuing a query since the last reset), a VK_ERROR_DEVICE_LOST + * error may occur." + */ + if (!q->maybe_available) + return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + + *available = true; + } else { + *available = q->maybe_available; + } + + return q->value; +} + +static uint64_t +get_query_result(struct v3dv_device *device, + struct v3dv_query_pool *pool, + uint32_t query, + bool do_wait, + bool *available) +{ + switch (pool->query_type) { + case VK_QUERY_TYPE_OCCLUSION: + return get_occlusion_query_result(device, pool, query, do_wait, available); + case VK_QUERY_TYPE_TIMESTAMP: + return get_timestamp_query_result(device, pool, query, do_wait, available); + default: + unreachable("Unsupported query type"); } } @@ -135,30 +228,8 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device, VkResult result = VK_SUCCESS; for (uint32_t i = first; i < first + count; i++) { - assert(pool->queries[i].bo && pool->queries[i].bo->map); - struct v3dv_bo *bo = pool->queries[i].bo; - const uint32_t *counter = (const uint32_t *) bo->map; - bool available; - if (do_wait) { - /* From the Vulkan 1.0 spec: - * - * "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not - * become available in a finite amount of time (e.g. due to not - * issuing a query since the last reset), a VK_ERROR_DEVICE_LOST - * error may occur." - */ - if (!pool->queries[i].maybe_available) - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); - - if (!v3dv_bo_wait(device, bo, 0xffffffffffffffffull)) - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); - - available = true; - } else { - available = pool->queries[i].maybe_available && - v3dv_bo_wait(device, bo, 0); - } + uint64_t value = get_query_result(device, pool, i, do_wait, &available); /** * From the Vulkan 1.0 spec: @@ -174,7 +245,7 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device, const bool write_result = available || do_partial; if (write_result) - write_query_result(data, slot, do_64bit, *counter); + write_query_result(data, slot, do_64bit, value); slot++; if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c index 0899e748ce6..722e6b4b42e 100644 --- a/src/broadcom/vulkan/v3dv_queue.c +++ b/src/broadcom/vulkan/v3dv_queue.c @@ -154,22 +154,37 @@ static VkResult handle_reset_query_cpu_job(struct v3dv_job *job) { /* We are about to reset query counters so we need to make sure that - * The GPU is not using them. + * The GPU is not using them. The exception is timestamp queries, since + * we handle those in the CPU. * * FIXME: we could avoid blocking the main thread for this if we use * submission thread. */ - VkResult result = gpu_queue_wait_idle(&job->device->queue); - if (result != VK_SUCCESS) - return result; - struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset; + assert(info->pool); + + if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) { + VkResult result = gpu_queue_wait_idle(&job->device->queue); + if (result != VK_SUCCESS) + return result; + } + for (uint32_t i = info->first; i < info->first + info->count; i++) { assert(i < info->pool->query_count); struct v3dv_query *query = &info->pool->queries[i]; query->maybe_available = false; - uint32_t *counter = (uint32_t *) query->bo->map; - *counter = 0; + switch (info->pool->query_type) { + case VK_QUERY_TYPE_OCCLUSION: { + uint32_t *counter = (uint32_t *) query->bo->map; + *counter = 0; + break; + } + case VK_QUERY_TYPE_TIMESTAMP: + query->value = 0; + break; + default: + unreachable("Unsupported query type"); + } } return VK_SUCCESS; @@ -419,6 +434,26 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job) return VK_SUCCESS; } +static VkResult +handle_timestamp_query_cpu_job(struct v3dv_job *job) +{ + assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY); + struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp; + + /* Wait for completion of all work queued before the timestamp query */ + v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue)); + + /* Compute timestamp */ + struct timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + assert(info->query < info->pool->query_count); + struct v3dv_query *query = &info->pool->queries[info->query]; + query->maybe_available = true; + query->value = t.tv_sec * 1000000000ull + t.tv_nsec; + + return VK_SUCCESS; +} + static VkResult handle_csd_job(struct v3dv_queue *queue, struct v3dv_job *job, @@ -705,6 +740,8 @@ queue_submit_job(struct v3dv_queue *queue, return handle_copy_buffer_to_image_cpu_job(job); case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: return handle_csd_indirect_cpu_job(queue, job, do_sem_wait); + case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY: + return handle_timestamp_query_cpu_job(job); default: unreachable("Unhandled job type"); }