v3dv: switch timestamp queries to using BO memory

Signed-off-by: Iago Toral Quiroga <itoral@igalia.com> Reviewed-by: Maíra Canal <mcanal@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26448>
2026-02-25 02:50:29 +01:00 · 2023-07-11 09:21:56 +02:00 · 2023-07-11 09:21:56 +02:00 · 2a0d196abf
commit 2a0d196abf
parent e404ccba5b
3 changed files with 118 additions and 28 deletions
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@ -1650,8 +1650,14 @@ struct v3dv_query {
         uint32_t offset;
      } occlusion;

-      /* Used by CPU queries (timestamp) */
-      uint64_t value;
+      /* Used by timestamp queries */
+      struct {
+         /* Offset of this query in the timestamp BO for its value */
+         uint32_t offset;
+
+         /* Syncobj to signal timestamp query availability */
+         struct vk_sync *sync;
+      } timestamp;

      /* Used by performance queries */
      struct v3dv_perf_query perf;
@ -1684,6 +1690,12 @@ struct v3dv_query_pool {
      uint32_t avail_offset;
   } occlusion;

+   /* Only used with timestamp queries */
+   struct {
+      /* BO with the query timestamp values */
+      struct v3dv_bo *bo;
+   } timestamp;
+
   /* Only used with performance queries */
   struct {
      uint32_t ncounters;
--- a/src/broadcom/vulkan/v3dv_query.c
+++ b/src/broadcom/vulkan/v3dv_query.c
@ -313,8 +313,22 @@ v3dv_CreateQueryPool(VkDevice _device,
      assert(pool->perfmon.nperfmons <= V3DV_MAX_PERFMONS);
      break;
   }
-   case VK_QUERY_TYPE_TIMESTAMP:
+   case VK_QUERY_TYPE_TIMESTAMP: {
+      /* 8 bytes per query used for the timestamp value. We have all
+       * timestamps tightly packed first in the buffer.
+       */
+      const uint32_t bo_size = pool->query_count * 8;
+      pool->timestamp.bo = v3dv_bo_alloc(device, bo_size, "query:t", true);
+      if (!pool->timestamp.bo) {
+         result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         goto fail;
+      }
+      if (!v3dv_bo_map(device, pool->timestamp.bo, bo_size)) {
+         result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         goto fail;
+      }
      break;
+   }
   default:
      unreachable("Unsupported query type");
   }
@ -330,7 +344,12 @@ v3dv_CreateQueryPool(VkDevice _device,
         break;
         }
      case VK_QUERY_TYPE_TIMESTAMP:
-         pool->queries[query_idx].value = 0;
+         pool->queries[query_idx].timestamp.offset = query_idx * 8;
+         result = vk_sync_create(&device->vk,
+                                 &device->pdevice->drm_syncobj_type, 0, 0,
+                                 &pool->queries[query_idx].timestamp.sync);
+         if (result != VK_SUCCESS)
+            goto fail;
         break;
      case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
         result = vk_sync_create(&device->vk,
@ -358,6 +377,11 @@ v3dv_CreateQueryPool(VkDevice _device,
   return VK_SUCCESS;

 fail:
+   if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
+      for (uint32_t j = 0; j < query_idx; j++)
+         vk_sync_destroy(&device->vk, pool->queries[j].timestamp.sync);
+   }
+
   if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
      for (uint32_t j = 0; j < query_idx; j++)
         vk_sync_destroy(&device->vk, pool->queries[j].perf.last_job_sync);
@ -365,6 +389,8 @@ fail:

   if (pool->occlusion.bo)
      v3dv_bo_free(device, pool->occlusion.bo);
+   if (pool->timestamp.bo)
+      v3dv_bo_free(device, pool->timestamp.bo);
   if (pool->queries)
      vk_free2(&device->vk.alloc, pAllocator, pool->queries);
   pool_destroy_meta_resources(device, pool);
@ -387,6 +413,14 @@ v3dv_DestroyQueryPool(VkDevice _device,
   if (pool->occlusion.bo)
      v3dv_bo_free(device, pool->occlusion.bo);

+   if (pool->timestamp.bo)
+      v3dv_bo_free(device, pool->timestamp.bo);
+
+   if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
+      for (uint32_t i = 0; i < pool->query_count; i++)
+         vk_sync_destroy(&device->vk, pool->queries[i].timestamp.sync);
+   }
+
   if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
      for (uint32_t i = 0; i < pool->query_count; i++) {
         kperfmon_destroy(device, pool, i);
@ -421,9 +455,9 @@ query_wait_available(struct v3dv_device *device,
                     uint32_t query_idx)
 {
   /* For occlusion queries we prefer to poll the availability BO in a loop
-    * to waiting on the occlusion query results BO, because the latter would
-    * make us wait for any job running occlusion queries, even if those queries
-    * do not involve the one we want to wait on.
+    * to waiting on the query results BO, because the latter would
+    * make us wait for any job running queries from the pool, even if those
+    * queries do not involve the one we want to wait on.
    */
   if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
      uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) +
@ -433,12 +467,19 @@ query_wait_available(struct v3dv_device *device,
      return VK_SUCCESS;
   }

-   /* For other queries we need to wait for the queue to signal that
+   if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
+      if (vk_sync_wait(&device->vk, q->timestamp.sync,
+                       0, VK_SYNC_WAIT_COMPLETE, UINT64_MAX) != VK_SUCCESS) {
+         return vk_device_set_lost(&device->vk, "Query job wait failed");
+      }
+      return VK_SUCCESS;
+   }
+
+   assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
+
+   /* For performance queries we need to wait for the queue to signal that
    * the query has been submitted for execution before anything else.
    */
-   assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
-          pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
-
   VkResult result = VK_SUCCESS;
   if (!q->maybe_available) {
      struct timespec timeout;
@ -485,18 +526,28 @@ query_check_available(struct v3dv_device *device,
                      struct v3dv_query *q,
                      uint32_t query_idx)
 {
-   /* For occlusion and performance queries we check the availability BO */
+   /* For occlusion we check the availability BO */
   if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
      const uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) +
                              pool->occlusion.avail_offset + query_idx;
      return (*q_addr != 0) ? VK_SUCCESS : VK_NOT_READY;
   }

+   /* For timestamp queries, we need to check if the relevant job
+    * has completed.
+    */
+   if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
+      if (vk_sync_wait(&device->vk, q->timestamp.sync,
+                       0, VK_SYNC_WAIT_COMPLETE, 0) != VK_SUCCESS) {
+         return VK_NOT_READY;
+      }
+      return VK_SUCCESS;
+   }
+
   /* For other queries we need to check if the queue has submitted the query
    * for execution at all.
    */
-   assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
-          pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
+   assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
   if (!q->maybe_available)
      return VK_NOT_READY;

@ -521,9 +572,6 @@ query_is_available(struct v3dv_device *device,
 {
   struct v3dv_query *q = &pool->queries[query];

-   assert(pool->query_type != VK_QUERY_TYPE_OCCLUSION ||
-          (pool->occlusion.bo && pool->occlusion.bo->map));
-
   if (do_wait) {
      VkResult result = query_wait_available(device, pool, q, query);
      if (result != VK_SUCCESS) {
@ -575,7 +623,10 @@ write_timestamp_query_result(struct v3dv_device *device,

   struct v3dv_query *q = &pool->queries[query];

-   write_to_buffer(data, slot, do_64bit, q->value);
+   const uint8_t *query_addr =
+      ((uint8_t *) pool->timestamp.bo->map) + q->timestamp.offset;
+
+   write_to_buffer(data, slot, do_64bit, *((uint64_t *)query_addr));
   return VK_SUCCESS;
 }

@ -1227,6 +1278,24 @@ v3dv_reset_query_pool_cpu(struct v3dv_device *device,
 {
   mtx_lock(&device->query_mutex);

+   if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
+      assert(first + count <= pool->query_count);
+
+      /* Reset timestamp */
+      uint8_t *base_addr;
+      base_addr  = ((uint8_t *) pool->timestamp.bo->map) +
+                    pool->queries[first].timestamp.offset;
+      memset(base_addr, 0, 8 * count);
+
+      for (uint32_t i = first; i < first + count; i++) {
+         if (vk_sync_reset(&device->vk, pool->queries[i].timestamp.sync) != VK_SUCCESS)
+            fprintf(stderr, "Failed to reset sync");
+      }
+
+      mtx_unlock(&device->query_mutex);
+      return;
+   }
+
   for (uint32_t i = first; i < first + count; i++) {
      assert(i < pool->query_count);
      struct v3dv_query *q = &pool->queries[i];
@ -1245,9 +1314,6 @@ v3dv_reset_query_pool_cpu(struct v3dv_device *device,
         *counter = 0;
         break;
      }
-      case VK_QUERY_TYPE_TIMESTAMP:
-         q->value = 0;
-         break;
      case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
         kperfmon_destroy(device, pool, i);
         kperfmon_create(device, pool, i);
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@ -316,12 +316,19 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
   assert(info->pool);

   /* We are about to reset query counters so we need to make sure that
-    * The GPU is not using them. The exception is timestamp queries, since
-    * we handle those in the CPU.
+    * The GPU is not using them.
    */
   if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)
      v3dv_bo_wait(job->device, info->pool->occlusion.bo, OS_TIMEOUT_INFINITE);

+   if (info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
+      VkResult result = queue_wait_idle(queue, sync_info);
+      if (result != VK_SUCCESS)
+         return result;
+
+      v3dv_bo_wait(job->device, info->pool->timestamp.bo, OS_TIMEOUT_INFINITE);
+   }
+
   if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
      struct vk_sync_wait waits[info->count];
      unsigned wait_count = 0;
@ -409,8 +416,7 @@ handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
   int err = 0;
   int fd = -1;

-   assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
-          info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
+   assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);

   if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
      result = export_perfmon_last_job_sync(queue, job, &fd);
@ -502,14 +508,20 @@ handle_timestamp_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
      assert(info->query + i < info->pool->query_count);
      struct v3dv_query *query = &info->pool->queries[info->query + i];
      query->maybe_available = true;
-      if (i == 0)
-         query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
+
+      /* Value */
+      uint8_t *value_addr =
+         ((uint8_t *) info->pool->timestamp.bo->map) + query->timestamp.offset;
+      *((uint64_t*)value_addr) = (i == 0) ? t.tv_sec * 1000000000ull + t.tv_nsec : 0ull;
+
+      /* Availability */
+      result = vk_sync_signal(&job->device->vk, query->timestamp.sync, 0);
   }

   cnd_broadcast(&job->device->query_ended);
   mtx_unlock(&job->device->query_mutex);

-   return VK_SUCCESS;
+   return result;
 }

 static VkResult