v3dv: add support for timestamp queries

V3D doesn't provide any means to acquire timestamps from the GPU so we have to implement these in the CPU. v2: enable timestampComputeAndGraphics and set timestampPeriod (Piñeiro) Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7373>
2025-12-24 15:20:10 +01:00 · 2020-10-29 11:55:23 +01:00 · 2020-10-29 11:55:23 +01:00 · 12f87b6e7c
commit 12f87b6e7c
parent 520f3e27b5
5 changed files with 200 additions and 53 deletions
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@ -5021,7 +5021,30 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
                       VkQueryPool queryPool,
                       uint32_t query)
 {
-   unreachable("Timestamp queries are not supported.");
+   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+   V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool);
+
+   /* If this is called inside a render pass we need to finish the current
+    * job here...
+    */
+   if (cmd_buffer->state.pass)
+      v3dv_cmd_buffer_finish_job(cmd_buffer);
+
+   struct v3dv_job *job =
+      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
+                                     V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
+                                     cmd_buffer, -1);
+   v3dv_return_if_oom(cmd_buffer, NULL);
+
+   job->cpu.query_timestamp.pool = query_pool;
+   job->cpu.query_timestamp.query = query;
+
+   list_addtail(&job->list_link, &cmd_buffer->jobs);
+   cmd_buffer->state.job = NULL;
+
+   /* ...and resume the subpass after the timestamp */
+   if (cmd_buffer->state.pass)
+      v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
 }

 static void
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@ -814,6 +814,11 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
   const VkSampleCountFlags supported_sample_counts =
      VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;

+   struct timespec clock_res;
+   clock_getres(CLOCK_MONOTONIC, &clock_res);
+   const float timestamp_period =
+      clock_res.tv_sec * 1000000000.0f + clock_res.tv_nsec;
+
   /* FIXME: this will probably require an in-depth review */
   VkPhysicalDeviceLimits limits = {
      .maxImageDimension1D                      = 4096,
@ -923,8 +928,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
      .sampledImageStencilSampleCounts          = supported_sample_counts,
      .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
      .maxSampleMaskWords                       = 1,
-      .timestampComputeAndGraphics              = false,
-      .timestampPeriod                          = 0.0f,
+      .timestampComputeAndGraphics              = true,
+      .timestampPeriod                          = timestamp_period,
      .maxClipDistances                         = 8,
      .maxCullDistances                         = 0,
      .maxCombinedClipAndCullDistances          = 8,
@ -990,7 +995,7 @@ v3dv_queue_family_properties = {
                 VK_QUEUE_COMPUTE_BIT |
                 VK_QUEUE_TRANSFER_BIT,
   .queueCount = 1,
-   .timestampValidBits = 0, /* FIXME */
+   .timestampValidBits = 64,
   .minImageTransferGranularity = { 1, 1, 1 },
 };

--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@ -744,6 +744,7 @@ enum v3dv_job_type {
   V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
   V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
   V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
+   V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
 };

 struct v3dv_reset_query_cpu_job_info {
@ -810,6 +811,11 @@ struct v3dv_csd_indirect_cpu_job_info {
   bool needs_wg_uniform_rewrite;
 };

+struct v3dv_timestamp_query_cpu_job_info {
+   struct v3dv_query_pool *pool;
+   uint32_t query;
+};
+
 struct v3dv_job {
   struct list_head list_link;

@ -881,6 +887,7 @@ struct v3dv_job {
      struct v3dv_clear_attachments_cpu_job_info    clear_attachments;
      struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
      struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
+      struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
   } cpu;

   /* Job specs for TFU jobs */
@ -1084,10 +1091,14 @@ struct v3dv_resource {

 struct v3dv_query {
   bool maybe_available;
-   struct v3dv_bo *bo;
+   union {
+      struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */
+      uint64_t value; /* Used by CPU queries (timestamp) */
+   };
 };

 struct v3dv_query_pool {
+   VkQueryType query_type;
   uint32_t query_count;
   struct v3dv_query *queries;
 };
--- a/src/broadcom/vulkan/v3dv_query.c
+++ b/src/broadcom/vulkan/v3dv_query.c
@ -31,12 +31,12 @@ v3dv_CreateQueryPool(VkDevice _device,
 {
   V3DV_FROM_HANDLE(v3dv_device, device, _device);

-   assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION);
+   assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION ||
+          pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP);
   assert(pCreateInfo->queryCount > 0);

-
   /* FIXME: the hw allows us to allocate up to 16 queries in a single block
-    *        so we should try to use that.
+    *        for occlussion queries so we should try to use that.
    */
   struct v3dv_query_pool *pool =
      vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
@ -44,6 +44,7 @@ v3dv_CreateQueryPool(VkDevice _device,
   if (pool == NULL)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

+   pool->query_type = pCreateInfo->queryType;
   pool->query_count = pCreateInfo->queryCount;

   VkResult result;
@ -59,16 +60,24 @@ v3dv_CreateQueryPool(VkDevice _device,
   uint32_t i;
   for (i = 0; i < pool->query_count; i++) {
      pool->queries[i].maybe_available = false;
-      pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
-      if (!pool->queries[i].bo) {
-         result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-         goto fail_alloc_bo;
-      }
-
-      /* For occlusion queries we only need a 4-byte counter */
-      if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
-         result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-         goto fail_alloc_bo;
+      switch (pool->query_type) {
+      case VK_QUERY_TYPE_OCCLUSION:
+         pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
+         if (!pool->queries[i].bo) {
+            result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+            goto fail_alloc_bo;
+         }
+         /* For occlusion queries we only need a 4-byte counter */
+         if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
+            result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+            goto fail_alloc_bo;
+         }
+         break;
+      case VK_QUERY_TYPE_TIMESTAMP:
+         pool->queries[i].value = 0;
+         break;
+      default:
+         unreachable("Unsupported query type");
      }
   }

@ -98,21 +107,105 @@ v3dv_DestroyQueryPool(VkDevice _device,
   if (!pool)
      return;

-   for (uint32_t i = 0; i < pool->query_count; i++)
-      v3dv_bo_free(device, pool->queries[i].bo);
+   if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
+      for (uint32_t i = 0; i < pool->query_count; i++)
+         v3dv_bo_free(device, pool->queries[i].bo);
+   }
+
   vk_free2(&device->alloc, pAllocator, pool->queries);
   vk_free2(&device->alloc, pAllocator, pool);
 }

 static void
-write_query_result(void *dst, uint32_t idx, bool do_64bit, uint32_t value)
+write_query_result(void *dst, uint32_t idx, bool do_64bit, uint64_t value)
 {
   if (do_64bit) {
      uint64_t *dst64 = (uint64_t *) dst;
      dst64[idx] = value;
   } else {
      uint32_t *dst32 = (uint32_t *) dst;
-      dst32[idx] = value;
+      dst32[idx] = (uint32_t) value;
+   }
+}
+
+static uint64_t
+get_occlusion_query_result(struct v3dv_device *device,
+                           struct v3dv_query_pool *pool,
+                           uint32_t query,
+                           bool do_wait,
+                           bool *available)
+{
+   assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION);
+
+   struct v3dv_query *q = &pool->queries[query];
+   assert(q->bo && q->bo->map);
+
+   if (do_wait) {
+      /* From the Vulkan 1.0 spec:
+       *
+       *    "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
+       *     become available in a finite amount of time (e.g. due to not
+       *     issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
+       *     error may occur."
+       */
+      if (!q->maybe_available)
+         return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+
+      if (!v3dv_bo_wait(device, q->bo, 0xffffffffffffffffull))
+         return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+
+      *available = true;
+   } else {
+      *available = q->maybe_available && v3dv_bo_wait(device, q->bo, 0);
+   }
+
+   return (uint64_t) *((uint32_t *) q->bo->map);
+}
+
+static uint64_t
+get_timestamp_query_result(struct v3dv_device *device,
+                           struct v3dv_query_pool *pool,
+                           uint32_t query,
+                           bool do_wait,
+                           bool *available)
+{
+   assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
+
+   struct v3dv_query *q = &pool->queries[query];
+
+   if (do_wait) {
+      /* From the Vulkan 1.0 spec:
+       *
+       *    "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
+       *     become available in a finite amount of time (e.g. due to not
+       *     issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
+       *     error may occur."
+       */
+      if (!q->maybe_available)
+         return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+
+      *available = true;
+   } else {
+      *available = q->maybe_available;
+   }
+
+   return q->value;
+}
+
+static uint64_t
+get_query_result(struct v3dv_device *device,
+                 struct v3dv_query_pool *pool,
+                 uint32_t query,
+                 bool do_wait,
+                 bool *available)
+{
+   switch (pool->query_type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      return get_occlusion_query_result(device, pool, query, do_wait, available);
+   case VK_QUERY_TYPE_TIMESTAMP:
+      return get_timestamp_query_result(device, pool, query, do_wait, available);
+   default:
+      unreachable("Unsupported query type");
   }
 }

@ -135,30 +228,8 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,

   VkResult result = VK_SUCCESS;
   for (uint32_t i = first; i < first + count; i++) {
-      assert(pool->queries[i].bo && pool->queries[i].bo->map);
-      struct v3dv_bo *bo = pool->queries[i].bo;
-      const uint32_t *counter = (const uint32_t *) bo->map;
-
      bool available;
-      if (do_wait) {
-         /* From the Vulkan 1.0 spec:
-          *
-          *    "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
-          *     become available in a finite amount of time (e.g. due to not
-          *     issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
-          *     error may occur."
-          */
-         if (!pool->queries[i].maybe_available)
-            return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
-
-         if (!v3dv_bo_wait(device, bo, 0xffffffffffffffffull))
-            return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
-
-         available = true;
-      } else {
-         available = pool->queries[i].maybe_available &&
-                     v3dv_bo_wait(device, bo, 0);
-      }
+      uint64_t value = get_query_result(device, pool, i, do_wait, &available);

      /**
       * From the Vulkan 1.0 spec:
@ -174,7 +245,7 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,

      const bool write_result = available || do_partial;
      if (write_result)
-         write_query_result(data, slot, do_64bit, *counter);
+         write_query_result(data, slot, do_64bit, value);
      slot++;

      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@ -154,22 +154,37 @@ static VkResult
 handle_reset_query_cpu_job(struct v3dv_job *job)
 {
   /* We are about to reset query counters so we need to make sure that
-    * The GPU is not using them.
+    * The GPU is not using them. The exception is timestamp queries, since
+    * we handle those in the CPU.
    *
    * FIXME: we could avoid blocking the main thread for this if we use
    *        submission thread.
    */
-   VkResult result = gpu_queue_wait_idle(&job->device->queue);
-   if (result != VK_SUCCESS)
-      return result;
-
   struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
+   assert(info->pool);
+
+   if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
+      VkResult result = gpu_queue_wait_idle(&job->device->queue);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
   for (uint32_t i = info->first; i < info->first + info->count; i++) {
      assert(i < info->pool->query_count);
      struct v3dv_query *query = &info->pool->queries[i];
      query->maybe_available = false;
-      uint32_t *counter = (uint32_t *) query->bo->map;
-      *counter = 0;
+      switch (info->pool->query_type) {
+      case VK_QUERY_TYPE_OCCLUSION: {
+         uint32_t *counter = (uint32_t *) query->bo->map;
+         *counter = 0;
+         break;
+      }
+      case VK_QUERY_TYPE_TIMESTAMP:
+         query->value = 0;
+         break;
+      default:
+         unreachable("Unsupported query type");
+      }
   }

   return VK_SUCCESS;
@ -419,6 +434,26 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
   return VK_SUCCESS;
 }

+static VkResult
+handle_timestamp_query_cpu_job(struct v3dv_job *job)
+{
+   assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
+   struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
+
+   /* Wait for completion of all work queued before the timestamp query */
+   v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
+
+   /* Compute timestamp */
+   struct timespec t;
+   clock_gettime(CLOCK_MONOTONIC, &t);
+   assert(info->query < info->pool->query_count);
+   struct v3dv_query *query = &info->pool->queries[info->query];
+   query->maybe_available = true;
+   query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
+
+   return VK_SUCCESS;
+}
+
 static VkResult
 handle_csd_job(struct v3dv_queue *queue,
               struct v3dv_job *job,
@ -705,6 +740,8 @@ queue_submit_job(struct v3dv_queue *queue,
      return handle_copy_buffer_to_image_cpu_job(job);
   case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
      return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);
+   case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
+      return handle_timestamp_query_cpu_job(job);
   default:
      unreachable("Unhandled job type");
   }