v3dv: add support for timestamp queries

V3D doesn't provide any means to acquire timestamps from the GPU
so we have to implement these in the CPU.

v2: enable timestampComputeAndGraphics and set timestampPeriod (Piñeiro)

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7373>
This commit is contained in:
Iago Toral Quiroga 2020-10-29 11:55:23 +01:00
parent 520f3e27b5
commit 12f87b6e7c
5 changed files with 200 additions and 53 deletions

View file

@ -5021,7 +5021,30 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query)
{
unreachable("Timestamp queries are not supported.");
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool);
/* If this is called inside a render pass we need to finish the current
* job here...
*/
if (cmd_buffer->state.pass)
v3dv_cmd_buffer_finish_job(cmd_buffer);
struct v3dv_job *job =
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
cmd_buffer, -1);
v3dv_return_if_oom(cmd_buffer, NULL);
job->cpu.query_timestamp.pool = query_pool;
job->cpu.query_timestamp.query = query;
list_addtail(&job->list_link, &cmd_buffer->jobs);
cmd_buffer->state.job = NULL;
/* ...and resume the subpass after the timestamp */
if (cmd_buffer->state.pass)
v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
}
static void

View file

@ -814,6 +814,11 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
const VkSampleCountFlags supported_sample_counts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
struct timespec clock_res;
clock_getres(CLOCK_MONOTONIC, &clock_res);
const float timestamp_period =
clock_res.tv_sec * 1000000000.0f + clock_res.tv_nsec;
/* FIXME: this will probably require an in-depth review */
VkPhysicalDeviceLimits limits = {
.maxImageDimension1D = 4096,
@ -923,8 +928,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.sampledImageStencilSampleCounts = supported_sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = false,
.timestampPeriod = 0.0f,
.timestampComputeAndGraphics = true,
.timestampPeriod = timestamp_period,
.maxClipDistances = 8,
.maxCullDistances = 0,
.maxCombinedClipAndCullDistances = 8,
@ -990,7 +995,7 @@ v3dv_queue_family_properties = {
VK_QUEUE_COMPUTE_BIT |
VK_QUEUE_TRANSFER_BIT,
.queueCount = 1,
.timestampValidBits = 0, /* FIXME */
.timestampValidBits = 64,
.minImageTransferGranularity = { 1, 1, 1 },
};

View file

@ -744,6 +744,7 @@ enum v3dv_job_type {
V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
};
struct v3dv_reset_query_cpu_job_info {
@ -810,6 +811,11 @@ struct v3dv_csd_indirect_cpu_job_info {
bool needs_wg_uniform_rewrite;
};
struct v3dv_timestamp_query_cpu_job_info {
struct v3dv_query_pool *pool;
uint32_t query;
};
struct v3dv_job {
struct list_head list_link;
@ -881,6 +887,7 @@ struct v3dv_job {
struct v3dv_clear_attachments_cpu_job_info clear_attachments;
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
struct v3dv_timestamp_query_cpu_job_info query_timestamp;
} cpu;
/* Job specs for TFU jobs */
@ -1084,10 +1091,14 @@ struct v3dv_resource {
struct v3dv_query {
bool maybe_available;
struct v3dv_bo *bo;
union {
struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */
uint64_t value; /* Used by CPU queries (timestamp) */
};
};
struct v3dv_query_pool {
VkQueryType query_type;
uint32_t query_count;
struct v3dv_query *queries;
};

View file

@ -31,12 +31,12 @@ v3dv_CreateQueryPool(VkDevice _device,
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION);
assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION ||
pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP);
assert(pCreateInfo->queryCount > 0);
/* FIXME: the hw allows us to allocate up to 16 queries in a single block
* so we should try to use that.
* for occlussion queries so we should try to use that.
*/
struct v3dv_query_pool *pool =
vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
@ -44,6 +44,7 @@ v3dv_CreateQueryPool(VkDevice _device,
if (pool == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
pool->query_type = pCreateInfo->queryType;
pool->query_count = pCreateInfo->queryCount;
VkResult result;
@ -59,16 +60,24 @@ v3dv_CreateQueryPool(VkDevice _device,
uint32_t i;
for (i = 0; i < pool->query_count; i++) {
pool->queries[i].maybe_available = false;
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
if (!pool->queries[i].bo) {
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_alloc_bo;
}
/* For occlusion queries we only need a 4-byte counter */
if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_alloc_bo;
switch (pool->query_type) {
case VK_QUERY_TYPE_OCCLUSION:
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
if (!pool->queries[i].bo) {
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_alloc_bo;
}
/* For occlusion queries we only need a 4-byte counter */
if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_alloc_bo;
}
break;
case VK_QUERY_TYPE_TIMESTAMP:
pool->queries[i].value = 0;
break;
default:
unreachable("Unsupported query type");
}
}
@ -98,21 +107,105 @@ v3dv_DestroyQueryPool(VkDevice _device,
if (!pool)
return;
for (uint32_t i = 0; i < pool->query_count; i++)
v3dv_bo_free(device, pool->queries[i].bo);
if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
for (uint32_t i = 0; i < pool->query_count; i++)
v3dv_bo_free(device, pool->queries[i].bo);
}
vk_free2(&device->alloc, pAllocator, pool->queries);
vk_free2(&device->alloc, pAllocator, pool);
}
static void
write_query_result(void *dst, uint32_t idx, bool do_64bit, uint32_t value)
write_query_result(void *dst, uint32_t idx, bool do_64bit, uint64_t value)
{
if (do_64bit) {
uint64_t *dst64 = (uint64_t *) dst;
dst64[idx] = value;
} else {
uint32_t *dst32 = (uint32_t *) dst;
dst32[idx] = value;
dst32[idx] = (uint32_t) value;
}
}
static uint64_t
get_occlusion_query_result(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t query,
bool do_wait,
bool *available)
{
assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION);
struct v3dv_query *q = &pool->queries[query];
assert(q->bo && q->bo->map);
if (do_wait) {
/* From the Vulkan 1.0 spec:
*
* "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
* become available in a finite amount of time (e.g. due to not
* issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
* error may occur."
*/
if (!q->maybe_available)
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
if (!v3dv_bo_wait(device, q->bo, 0xffffffffffffffffull))
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
*available = true;
} else {
*available = q->maybe_available && v3dv_bo_wait(device, q->bo, 0);
}
return (uint64_t) *((uint32_t *) q->bo->map);
}
static uint64_t
get_timestamp_query_result(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t query,
bool do_wait,
bool *available)
{
assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
struct v3dv_query *q = &pool->queries[query];
if (do_wait) {
/* From the Vulkan 1.0 spec:
*
* "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
* become available in a finite amount of time (e.g. due to not
* issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
* error may occur."
*/
if (!q->maybe_available)
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
*available = true;
} else {
*available = q->maybe_available;
}
return q->value;
}
static uint64_t
get_query_result(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t query,
bool do_wait,
bool *available)
{
switch (pool->query_type) {
case VK_QUERY_TYPE_OCCLUSION:
return get_occlusion_query_result(device, pool, query, do_wait, available);
case VK_QUERY_TYPE_TIMESTAMP:
return get_timestamp_query_result(device, pool, query, do_wait, available);
default:
unreachable("Unsupported query type");
}
}
@ -135,30 +228,8 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
VkResult result = VK_SUCCESS;
for (uint32_t i = first; i < first + count; i++) {
assert(pool->queries[i].bo && pool->queries[i].bo->map);
struct v3dv_bo *bo = pool->queries[i].bo;
const uint32_t *counter = (const uint32_t *) bo->map;
bool available;
if (do_wait) {
/* From the Vulkan 1.0 spec:
*
* "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
* become available in a finite amount of time (e.g. due to not
* issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
* error may occur."
*/
if (!pool->queries[i].maybe_available)
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
if (!v3dv_bo_wait(device, bo, 0xffffffffffffffffull))
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
available = true;
} else {
available = pool->queries[i].maybe_available &&
v3dv_bo_wait(device, bo, 0);
}
uint64_t value = get_query_result(device, pool, i, do_wait, &available);
/**
* From the Vulkan 1.0 spec:
@ -174,7 +245,7 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
const bool write_result = available || do_partial;
if (write_result)
write_query_result(data, slot, do_64bit, *counter);
write_query_result(data, slot, do_64bit, value);
slot++;
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)

View file

@ -154,22 +154,37 @@ static VkResult
handle_reset_query_cpu_job(struct v3dv_job *job)
{
/* We are about to reset query counters so we need to make sure that
* The GPU is not using them.
* The GPU is not using them. The exception is timestamp queries, since
* we handle those in the CPU.
*
* FIXME: we could avoid blocking the main thread for this if we use
* submission thread.
*/
VkResult result = gpu_queue_wait_idle(&job->device->queue);
if (result != VK_SUCCESS)
return result;
struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
assert(info->pool);
if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
VkResult result = gpu_queue_wait_idle(&job->device->queue);
if (result != VK_SUCCESS)
return result;
}
for (uint32_t i = info->first; i < info->first + info->count; i++) {
assert(i < info->pool->query_count);
struct v3dv_query *query = &info->pool->queries[i];
query->maybe_available = false;
uint32_t *counter = (uint32_t *) query->bo->map;
*counter = 0;
switch (info->pool->query_type) {
case VK_QUERY_TYPE_OCCLUSION: {
uint32_t *counter = (uint32_t *) query->bo->map;
*counter = 0;
break;
}
case VK_QUERY_TYPE_TIMESTAMP:
query->value = 0;
break;
default:
unreachable("Unsupported query type");
}
}
return VK_SUCCESS;
@ -419,6 +434,26 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
return VK_SUCCESS;
}
static VkResult
handle_timestamp_query_cpu_job(struct v3dv_job *job)
{
assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
/* Wait for completion of all work queued before the timestamp query */
v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
/* Compute timestamp */
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
assert(info->query < info->pool->query_count);
struct v3dv_query *query = &info->pool->queries[info->query];
query->maybe_available = true;
query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
return VK_SUCCESS;
}
static VkResult
handle_csd_job(struct v3dv_queue *queue,
struct v3dv_job *job,
@ -705,6 +740,8 @@ queue_submit_job(struct v3dv_queue *queue,
return handle_copy_buffer_to_image_cpu_job(job);
case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);
case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
return handle_timestamp_query_cpu_job(job);
default:
unreachable("Unhandled job type");
}