mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
v3dv: add support for timestamp queries
V3D doesn't provide any means to acquire timestamps from the GPU so we have to implement these in the CPU. v2: enable timestampComputeAndGraphics and set timestampPeriod (Piñeiro) Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7373>
This commit is contained in:
parent
520f3e27b5
commit
12f87b6e7c
5 changed files with 200 additions and 53 deletions
|
|
@ -5021,7 +5021,30 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
|
|||
VkQueryPool queryPool,
|
||||
uint32_t query)
|
||||
{
|
||||
unreachable("Timestamp queries are not supported.");
|
||||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool);
|
||||
|
||||
/* If this is called inside a render pass we need to finish the current
|
||||
* job here...
|
||||
*/
|
||||
if (cmd_buffer->state.pass)
|
||||
v3dv_cmd_buffer_finish_job(cmd_buffer);
|
||||
|
||||
struct v3dv_job *job =
|
||||
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
|
||||
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
|
||||
cmd_buffer, -1);
|
||||
v3dv_return_if_oom(cmd_buffer, NULL);
|
||||
|
||||
job->cpu.query_timestamp.pool = query_pool;
|
||||
job->cpu.query_timestamp.query = query;
|
||||
|
||||
list_addtail(&job->list_link, &cmd_buffer->jobs);
|
||||
cmd_buffer->state.job = NULL;
|
||||
|
||||
/* ...and resume the subpass after the timestamp */
|
||||
if (cmd_buffer->state.pass)
|
||||
v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -814,6 +814,11 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
|
|||
const VkSampleCountFlags supported_sample_counts =
|
||||
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
|
||||
|
||||
struct timespec clock_res;
|
||||
clock_getres(CLOCK_MONOTONIC, &clock_res);
|
||||
const float timestamp_period =
|
||||
clock_res.tv_sec * 1000000000.0f + clock_res.tv_nsec;
|
||||
|
||||
/* FIXME: this will probably require an in-depth review */
|
||||
VkPhysicalDeviceLimits limits = {
|
||||
.maxImageDimension1D = 4096,
|
||||
|
|
@ -923,8 +928,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
|
|||
.sampledImageStencilSampleCounts = supported_sample_counts,
|
||||
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
|
||||
.maxSampleMaskWords = 1,
|
||||
.timestampComputeAndGraphics = false,
|
||||
.timestampPeriod = 0.0f,
|
||||
.timestampComputeAndGraphics = true,
|
||||
.timestampPeriod = timestamp_period,
|
||||
.maxClipDistances = 8,
|
||||
.maxCullDistances = 0,
|
||||
.maxCombinedClipAndCullDistances = 8,
|
||||
|
|
@ -990,7 +995,7 @@ v3dv_queue_family_properties = {
|
|||
VK_QUEUE_COMPUTE_BIT |
|
||||
VK_QUEUE_TRANSFER_BIT,
|
||||
.queueCount = 1,
|
||||
.timestampValidBits = 0, /* FIXME */
|
||||
.timestampValidBits = 64,
|
||||
.minImageTransferGranularity = { 1, 1, 1 },
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -744,6 +744,7 @@ enum v3dv_job_type {
|
|||
V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
|
||||
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
|
||||
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
|
||||
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
|
||||
};
|
||||
|
||||
struct v3dv_reset_query_cpu_job_info {
|
||||
|
|
@ -810,6 +811,11 @@ struct v3dv_csd_indirect_cpu_job_info {
|
|||
bool needs_wg_uniform_rewrite;
|
||||
};
|
||||
|
||||
struct v3dv_timestamp_query_cpu_job_info {
|
||||
struct v3dv_query_pool *pool;
|
||||
uint32_t query;
|
||||
};
|
||||
|
||||
struct v3dv_job {
|
||||
struct list_head list_link;
|
||||
|
||||
|
|
@ -881,6 +887,7 @@ struct v3dv_job {
|
|||
struct v3dv_clear_attachments_cpu_job_info clear_attachments;
|
||||
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
|
||||
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
|
||||
struct v3dv_timestamp_query_cpu_job_info query_timestamp;
|
||||
} cpu;
|
||||
|
||||
/* Job specs for TFU jobs */
|
||||
|
|
@ -1084,10 +1091,14 @@ struct v3dv_resource {
|
|||
|
||||
struct v3dv_query {
|
||||
bool maybe_available;
|
||||
struct v3dv_bo *bo;
|
||||
union {
|
||||
struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */
|
||||
uint64_t value; /* Used by CPU queries (timestamp) */
|
||||
};
|
||||
};
|
||||
|
||||
struct v3dv_query_pool {
|
||||
VkQueryType query_type;
|
||||
uint32_t query_count;
|
||||
struct v3dv_query *queries;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -31,12 +31,12 @@ v3dv_CreateQueryPool(VkDevice _device,
|
|||
{
|
||||
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
||||
|
||||
assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION);
|
||||
assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION ||
|
||||
pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP);
|
||||
assert(pCreateInfo->queryCount > 0);
|
||||
|
||||
|
||||
/* FIXME: the hw allows us to allocate up to 16 queries in a single block
|
||||
* so we should try to use that.
|
||||
* for occlussion queries so we should try to use that.
|
||||
*/
|
||||
struct v3dv_query_pool *pool =
|
||||
vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
|
||||
|
|
@ -44,6 +44,7 @@ v3dv_CreateQueryPool(VkDevice _device,
|
|||
if (pool == NULL)
|
||||
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pool->query_type = pCreateInfo->queryType;
|
||||
pool->query_count = pCreateInfo->queryCount;
|
||||
|
||||
VkResult result;
|
||||
|
|
@ -59,16 +60,24 @@ v3dv_CreateQueryPool(VkDevice _device,
|
|||
uint32_t i;
|
||||
for (i = 0; i < pool->query_count; i++) {
|
||||
pool->queries[i].maybe_available = false;
|
||||
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
|
||||
if (!pool->queries[i].bo) {
|
||||
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail_alloc_bo;
|
||||
}
|
||||
|
||||
/* For occlusion queries we only need a 4-byte counter */
|
||||
if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
|
||||
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail_alloc_bo;
|
||||
switch (pool->query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
|
||||
if (!pool->queries[i].bo) {
|
||||
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail_alloc_bo;
|
||||
}
|
||||
/* For occlusion queries we only need a 4-byte counter */
|
||||
if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
|
||||
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail_alloc_bo;
|
||||
}
|
||||
break;
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
pool->queries[i].value = 0;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -98,21 +107,105 @@ v3dv_DestroyQueryPool(VkDevice _device,
|
|||
if (!pool)
|
||||
return;
|
||||
|
||||
for (uint32_t i = 0; i < pool->query_count; i++)
|
||||
v3dv_bo_free(device, pool->queries[i].bo);
|
||||
if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
|
||||
for (uint32_t i = 0; i < pool->query_count; i++)
|
||||
v3dv_bo_free(device, pool->queries[i].bo);
|
||||
}
|
||||
|
||||
vk_free2(&device->alloc, pAllocator, pool->queries);
|
||||
vk_free2(&device->alloc, pAllocator, pool);
|
||||
}
|
||||
|
||||
static void
|
||||
write_query_result(void *dst, uint32_t idx, bool do_64bit, uint32_t value)
|
||||
write_query_result(void *dst, uint32_t idx, bool do_64bit, uint64_t value)
|
||||
{
|
||||
if (do_64bit) {
|
||||
uint64_t *dst64 = (uint64_t *) dst;
|
||||
dst64[idx] = value;
|
||||
} else {
|
||||
uint32_t *dst32 = (uint32_t *) dst;
|
||||
dst32[idx] = value;
|
||||
dst32[idx] = (uint32_t) value;
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
get_occlusion_query_result(struct v3dv_device *device,
|
||||
struct v3dv_query_pool *pool,
|
||||
uint32_t query,
|
||||
bool do_wait,
|
||||
bool *available)
|
||||
{
|
||||
assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION);
|
||||
|
||||
struct v3dv_query *q = &pool->queries[query];
|
||||
assert(q->bo && q->bo->map);
|
||||
|
||||
if (do_wait) {
|
||||
/* From the Vulkan 1.0 spec:
|
||||
*
|
||||
* "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
|
||||
* become available in a finite amount of time (e.g. due to not
|
||||
* issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
|
||||
* error may occur."
|
||||
*/
|
||||
if (!q->maybe_available)
|
||||
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
|
||||
|
||||
if (!v3dv_bo_wait(device, q->bo, 0xffffffffffffffffull))
|
||||
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
|
||||
|
||||
*available = true;
|
||||
} else {
|
||||
*available = q->maybe_available && v3dv_bo_wait(device, q->bo, 0);
|
||||
}
|
||||
|
||||
return (uint64_t) *((uint32_t *) q->bo->map);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
get_timestamp_query_result(struct v3dv_device *device,
|
||||
struct v3dv_query_pool *pool,
|
||||
uint32_t query,
|
||||
bool do_wait,
|
||||
bool *available)
|
||||
{
|
||||
assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
|
||||
|
||||
struct v3dv_query *q = &pool->queries[query];
|
||||
|
||||
if (do_wait) {
|
||||
/* From the Vulkan 1.0 spec:
|
||||
*
|
||||
* "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
|
||||
* become available in a finite amount of time (e.g. due to not
|
||||
* issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
|
||||
* error may occur."
|
||||
*/
|
||||
if (!q->maybe_available)
|
||||
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
|
||||
|
||||
*available = true;
|
||||
} else {
|
||||
*available = q->maybe_available;
|
||||
}
|
||||
|
||||
return q->value;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
get_query_result(struct v3dv_device *device,
|
||||
struct v3dv_query_pool *pool,
|
||||
uint32_t query,
|
||||
bool do_wait,
|
||||
bool *available)
|
||||
{
|
||||
switch (pool->query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
return get_occlusion_query_result(device, pool, query, do_wait, available);
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
return get_timestamp_query_result(device, pool, query, do_wait, available);
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -135,30 +228,8 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
|
|||
|
||||
VkResult result = VK_SUCCESS;
|
||||
for (uint32_t i = first; i < first + count; i++) {
|
||||
assert(pool->queries[i].bo && pool->queries[i].bo->map);
|
||||
struct v3dv_bo *bo = pool->queries[i].bo;
|
||||
const uint32_t *counter = (const uint32_t *) bo->map;
|
||||
|
||||
bool available;
|
||||
if (do_wait) {
|
||||
/* From the Vulkan 1.0 spec:
|
||||
*
|
||||
* "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
|
||||
* become available in a finite amount of time (e.g. due to not
|
||||
* issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
|
||||
* error may occur."
|
||||
*/
|
||||
if (!pool->queries[i].maybe_available)
|
||||
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
|
||||
|
||||
if (!v3dv_bo_wait(device, bo, 0xffffffffffffffffull))
|
||||
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
|
||||
|
||||
available = true;
|
||||
} else {
|
||||
available = pool->queries[i].maybe_available &&
|
||||
v3dv_bo_wait(device, bo, 0);
|
||||
}
|
||||
uint64_t value = get_query_result(device, pool, i, do_wait, &available);
|
||||
|
||||
/**
|
||||
* From the Vulkan 1.0 spec:
|
||||
|
|
@ -174,7 +245,7 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
|
|||
|
||||
const bool write_result = available || do_partial;
|
||||
if (write_result)
|
||||
write_query_result(data, slot, do_64bit, *counter);
|
||||
write_query_result(data, slot, do_64bit, value);
|
||||
slot++;
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||
|
|
|
|||
|
|
@ -154,22 +154,37 @@ static VkResult
|
|||
handle_reset_query_cpu_job(struct v3dv_job *job)
|
||||
{
|
||||
/* We are about to reset query counters so we need to make sure that
|
||||
* The GPU is not using them.
|
||||
* The GPU is not using them. The exception is timestamp queries, since
|
||||
* we handle those in the CPU.
|
||||
*
|
||||
* FIXME: we could avoid blocking the main thread for this if we use
|
||||
* submission thread.
|
||||
*/
|
||||
VkResult result = gpu_queue_wait_idle(&job->device->queue);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
|
||||
assert(info->pool);
|
||||
|
||||
if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
|
||||
VkResult result = gpu_queue_wait_idle(&job->device->queue);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
|
||||
for (uint32_t i = info->first; i < info->first + info->count; i++) {
|
||||
assert(i < info->pool->query_count);
|
||||
struct v3dv_query *query = &info->pool->queries[i];
|
||||
query->maybe_available = false;
|
||||
uint32_t *counter = (uint32_t *) query->bo->map;
|
||||
*counter = 0;
|
||||
switch (info->pool->query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION: {
|
||||
uint32_t *counter = (uint32_t *) query->bo->map;
|
||||
*counter = 0;
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
query->value = 0;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -419,6 +434,26 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
handle_timestamp_query_cpu_job(struct v3dv_job *job)
|
||||
{
|
||||
assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
|
||||
struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
|
||||
|
||||
/* Wait for completion of all work queued before the timestamp query */
|
||||
v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
|
||||
|
||||
/* Compute timestamp */
|
||||
struct timespec t;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t);
|
||||
assert(info->query < info->pool->query_count);
|
||||
struct v3dv_query *query = &info->pool->queries[info->query];
|
||||
query->maybe_available = true;
|
||||
query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
handle_csd_job(struct v3dv_queue *queue,
|
||||
struct v3dv_job *job,
|
||||
|
|
@ -705,6 +740,8 @@ queue_submit_job(struct v3dv_queue *queue,
|
|||
return handle_copy_buffer_to_image_cpu_job(job);
|
||||
case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
|
||||
return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);
|
||||
case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
|
||||
return handle_timestamp_query_cpu_job(job);
|
||||
default:
|
||||
unreachable("Unhandled job type");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue