diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 7b6e9a38f12..ca46b291a77 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -1650,8 +1650,14 @@ struct v3dv_query { uint32_t offset; } occlusion; - /* Used by CPU queries (timestamp) */ - uint64_t value; + /* Used by timestamp queries */ + struct { + /* Offset of this query in the timestamp BO for its value */ + uint32_t offset; + + /* Syncobj to signal timestamp query availability */ + struct vk_sync *sync; + } timestamp; /* Used by performance queries */ struct v3dv_perf_query perf; @@ -1684,6 +1690,12 @@ struct v3dv_query_pool { uint32_t avail_offset; } occlusion; + /* Only used with timestamp queries */ + struct { + /* BO with the query timestamp values */ + struct v3dv_bo *bo; + } timestamp; + /* Only used with performance queries */ struct { uint32_t ncounters; diff --git a/src/broadcom/vulkan/v3dv_query.c b/src/broadcom/vulkan/v3dv_query.c index d6f93466d20..6a77c9ec621 100644 --- a/src/broadcom/vulkan/v3dv_query.c +++ b/src/broadcom/vulkan/v3dv_query.c @@ -313,8 +313,22 @@ v3dv_CreateQueryPool(VkDevice _device, assert(pool->perfmon.nperfmons <= V3DV_MAX_PERFMONS); break; } - case VK_QUERY_TYPE_TIMESTAMP: + case VK_QUERY_TYPE_TIMESTAMP: { + /* 8 bytes per query used for the timestamp value. We have all + * timestamps tightly packed first in the buffer. + */ + const uint32_t bo_size = pool->query_count * 8; + pool->timestamp.bo = v3dv_bo_alloc(device, bo_size, "query:t", true); + if (!pool->timestamp.bo) { + result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + if (!v3dv_bo_map(device, pool->timestamp.bo, bo_size)) { + result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } break; + } default: unreachable("Unsupported query type"); } @@ -330,7 +344,12 @@ v3dv_CreateQueryPool(VkDevice _device, break; } case VK_QUERY_TYPE_TIMESTAMP: - pool->queries[query_idx].value = 0; + pool->queries[query_idx].timestamp.offset = query_idx * 8; + result = vk_sync_create(&device->vk, + &device->pdevice->drm_syncobj_type, 0, 0, + &pool->queries[query_idx].timestamp.sync); + if (result != VK_SUCCESS) + goto fail; break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: { result = vk_sync_create(&device->vk, @@ -358,6 +377,11 @@ v3dv_CreateQueryPool(VkDevice _device, return VK_SUCCESS; fail: + if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) { + for (uint32_t j = 0; j < query_idx; j++) + vk_sync_destroy(&device->vk, pool->queries[j].timestamp.sync); + } + if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { for (uint32_t j = 0; j < query_idx; j++) vk_sync_destroy(&device->vk, pool->queries[j].perf.last_job_sync); @@ -365,6 +389,8 @@ fail: if (pool->occlusion.bo) v3dv_bo_free(device, pool->occlusion.bo); + if (pool->timestamp.bo) + v3dv_bo_free(device, pool->timestamp.bo); if (pool->queries) vk_free2(&device->vk.alloc, pAllocator, pool->queries); pool_destroy_meta_resources(device, pool); @@ -387,6 +413,14 @@ v3dv_DestroyQueryPool(VkDevice _device, if (pool->occlusion.bo) v3dv_bo_free(device, pool->occlusion.bo); + if (pool->timestamp.bo) + v3dv_bo_free(device, pool->timestamp.bo); + + if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) { + for (uint32_t i = 0; i < pool->query_count; i++) + vk_sync_destroy(&device->vk, pool->queries[i].timestamp.sync); + } + if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { for (uint32_t i = 0; i < pool->query_count; i++) { kperfmon_destroy(device, pool, i); @@ -421,9 +455,9 @@ query_wait_available(struct v3dv_device *device, uint32_t query_idx) { /* For occlusion queries we prefer to poll the availability BO in a loop - * to waiting on the occlusion query results BO, because the latter would - * make us wait for any job running occlusion queries, even if those queries - * do not involve the one we want to wait on. + * to waiting on the query results BO, because the latter would + * make us wait for any job running queries from the pool, even if those + * queries do not involve the one we want to wait on. */ if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) { uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) + @@ -433,12 +467,19 @@ query_wait_available(struct v3dv_device *device, return VK_SUCCESS; } - /* For other queries we need to wait for the queue to signal that + if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) { + if (vk_sync_wait(&device->vk, q->timestamp.sync, + 0, VK_SYNC_WAIT_COMPLETE, UINT64_MAX) != VK_SUCCESS) { + return vk_device_set_lost(&device->vk, "Query job wait failed"); + } + return VK_SUCCESS; + } + + assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR); + + /* For performance queries we need to wait for the queue to signal that * the query has been submitted for execution before anything else. */ - assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR || - pool->query_type == VK_QUERY_TYPE_TIMESTAMP); - VkResult result = VK_SUCCESS; if (!q->maybe_available) { struct timespec timeout; @@ -485,18 +526,28 @@ query_check_available(struct v3dv_device *device, struct v3dv_query *q, uint32_t query_idx) { - /* For occlusion and performance queries we check the availability BO */ + /* For occlusion we check the availability BO */ if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) { const uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) + pool->occlusion.avail_offset + query_idx; return (*q_addr != 0) ? VK_SUCCESS : VK_NOT_READY; } + /* For timestamp queries, we need to check if the relevant job + * has completed. + */ + if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) { + if (vk_sync_wait(&device->vk, q->timestamp.sync, + 0, VK_SYNC_WAIT_COMPLETE, 0) != VK_SUCCESS) { + return VK_NOT_READY; + } + return VK_SUCCESS; + } + /* For other queries we need to check if the queue has submitted the query * for execution at all. */ - assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR || - pool->query_type == VK_QUERY_TYPE_TIMESTAMP); + assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR); if (!q->maybe_available) return VK_NOT_READY; @@ -521,9 +572,6 @@ query_is_available(struct v3dv_device *device, { struct v3dv_query *q = &pool->queries[query]; - assert(pool->query_type != VK_QUERY_TYPE_OCCLUSION || - (pool->occlusion.bo && pool->occlusion.bo->map)); - if (do_wait) { VkResult result = query_wait_available(device, pool, q, query); if (result != VK_SUCCESS) { @@ -575,7 +623,10 @@ write_timestamp_query_result(struct v3dv_device *device, struct v3dv_query *q = &pool->queries[query]; - write_to_buffer(data, slot, do_64bit, q->value); + const uint8_t *query_addr = + ((uint8_t *) pool->timestamp.bo->map) + q->timestamp.offset; + + write_to_buffer(data, slot, do_64bit, *((uint64_t *)query_addr)); return VK_SUCCESS; } @@ -1227,6 +1278,24 @@ v3dv_reset_query_pool_cpu(struct v3dv_device *device, { mtx_lock(&device->query_mutex); + if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) { + assert(first + count <= pool->query_count); + + /* Reset timestamp */ + uint8_t *base_addr; + base_addr = ((uint8_t *) pool->timestamp.bo->map) + + pool->queries[first].timestamp.offset; + memset(base_addr, 0, 8 * count); + + for (uint32_t i = first; i < first + count; i++) { + if (vk_sync_reset(&device->vk, pool->queries[i].timestamp.sync) != VK_SUCCESS) + fprintf(stderr, "Failed to reset sync"); + } + + mtx_unlock(&device->query_mutex); + return; + } + for (uint32_t i = first; i < first + count; i++) { assert(i < pool->query_count); struct v3dv_query *q = &pool->queries[i]; @@ -1245,9 +1314,6 @@ v3dv_reset_query_pool_cpu(struct v3dv_device *device, *counter = 0; break; } - case VK_QUERY_TYPE_TIMESTAMP: - q->value = 0; - break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: kperfmon_destroy(device, pool, i); kperfmon_create(device, pool, i); diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c index 3f0e06d2642..aede121f6cb 100644 --- a/src/broadcom/vulkan/v3dv_queue.c +++ b/src/broadcom/vulkan/v3dv_queue.c @@ -316,12 +316,19 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, assert(info->pool); /* We are about to reset query counters so we need to make sure that - * The GPU is not using them. The exception is timestamp queries, since - * we handle those in the CPU. + * The GPU is not using them. */ if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) v3dv_bo_wait(job->device, info->pool->occlusion.bo, OS_TIMEOUT_INFINITE); + if (info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP) { + VkResult result = queue_wait_idle(queue, sync_info); + if (result != VK_SUCCESS) + return result; + + v3dv_bo_wait(job->device, info->pool->timestamp.bo, OS_TIMEOUT_INFINITE); + } + if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { struct vk_sync_wait waits[info->count]; unsigned wait_count = 0; @@ -409,8 +416,7 @@ handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx) int err = 0; int fd = -1; - assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR || - info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP); + assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR); if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { result = export_perfmon_last_job_sync(queue, job, &fd); @@ -502,14 +508,20 @@ handle_timestamp_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, assert(info->query + i < info->pool->query_count); struct v3dv_query *query = &info->pool->queries[info->query + i]; query->maybe_available = true; - if (i == 0) - query->value = t.tv_sec * 1000000000ull + t.tv_nsec; + + /* Value */ + uint8_t *value_addr = + ((uint8_t *) info->pool->timestamp.bo->map) + query->timestamp.offset; + *((uint64_t*)value_addr) = (i == 0) ? t.tv_sec * 1000000000ull + t.tv_nsec : 0ull; + + /* Availability */ + result = vk_sync_signal(&job->device->vk, query->timestamp.sync, 0); } cnd_broadcast(&job->device->query_ended); mtx_unlock(&job->device->query_mutex); - return VK_SUCCESS; + return result; } static VkResult