v3dv: switch timestamp queries to using BO memory

Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Maíra Canal <mcanal@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26448>
This commit is contained in:
Iago Toral Quiroga 2023-07-11 09:21:56 +02:00 committed by Marge Bot
parent e404ccba5b
commit 2a0d196abf
3 changed files with 118 additions and 28 deletions

View file

@ -1650,8 +1650,14 @@ struct v3dv_query {
uint32_t offset;
} occlusion;
/* Used by CPU queries (timestamp) */
uint64_t value;
/* Used by timestamp queries */
struct {
/* Offset of this query in the timestamp BO for its value */
uint32_t offset;
/* Syncobj to signal timestamp query availability */
struct vk_sync *sync;
} timestamp;
/* Used by performance queries */
struct v3dv_perf_query perf;
@ -1684,6 +1690,12 @@ struct v3dv_query_pool {
uint32_t avail_offset;
} occlusion;
/* Only used with timestamp queries */
struct {
/* BO with the query timestamp values */
struct v3dv_bo *bo;
} timestamp;
/* Only used with performance queries */
struct {
uint32_t ncounters;

View file

@ -313,8 +313,22 @@ v3dv_CreateQueryPool(VkDevice _device,
assert(pool->perfmon.nperfmons <= V3DV_MAX_PERFMONS);
break;
}
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_TIMESTAMP: {
/* 8 bytes per query used for the timestamp value. We have all
* timestamps tightly packed first in the buffer.
*/
const uint32_t bo_size = pool->query_count * 8;
pool->timestamp.bo = v3dv_bo_alloc(device, bo_size, "query:t", true);
if (!pool->timestamp.bo) {
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail;
}
if (!v3dv_bo_map(device, pool->timestamp.bo, bo_size)) {
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail;
}
break;
}
default:
unreachable("Unsupported query type");
}
@ -330,7 +344,12 @@ v3dv_CreateQueryPool(VkDevice _device,
break;
}
case VK_QUERY_TYPE_TIMESTAMP:
pool->queries[query_idx].value = 0;
pool->queries[query_idx].timestamp.offset = query_idx * 8;
result = vk_sync_create(&device->vk,
&device->pdevice->drm_syncobj_type, 0, 0,
&pool->queries[query_idx].timestamp.sync);
if (result != VK_SUCCESS)
goto fail;
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
result = vk_sync_create(&device->vk,
@ -358,6 +377,11 @@ v3dv_CreateQueryPool(VkDevice _device,
return VK_SUCCESS;
fail:
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
for (uint32_t j = 0; j < query_idx; j++)
vk_sync_destroy(&device->vk, pool->queries[j].timestamp.sync);
}
if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
for (uint32_t j = 0; j < query_idx; j++)
vk_sync_destroy(&device->vk, pool->queries[j].perf.last_job_sync);
@ -365,6 +389,8 @@ fail:
if (pool->occlusion.bo)
v3dv_bo_free(device, pool->occlusion.bo);
if (pool->timestamp.bo)
v3dv_bo_free(device, pool->timestamp.bo);
if (pool->queries)
vk_free2(&device->vk.alloc, pAllocator, pool->queries);
pool_destroy_meta_resources(device, pool);
@ -387,6 +413,14 @@ v3dv_DestroyQueryPool(VkDevice _device,
if (pool->occlusion.bo)
v3dv_bo_free(device, pool->occlusion.bo);
if (pool->timestamp.bo)
v3dv_bo_free(device, pool->timestamp.bo);
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
for (uint32_t i = 0; i < pool->query_count; i++)
vk_sync_destroy(&device->vk, pool->queries[i].timestamp.sync);
}
if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
for (uint32_t i = 0; i < pool->query_count; i++) {
kperfmon_destroy(device, pool, i);
@ -421,9 +455,9 @@ query_wait_available(struct v3dv_device *device,
uint32_t query_idx)
{
/* For occlusion queries we prefer to poll the availability BO in a loop
* to waiting on the occlusion query results BO, because the latter would
* make us wait for any job running occlusion queries, even if those queries
* do not involve the one we want to wait on.
* to waiting on the query results BO, because the latter would
* make us wait for any job running queries from the pool, even if those
* queries do not involve the one we want to wait on.
*/
if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) +
@ -433,12 +467,19 @@ query_wait_available(struct v3dv_device *device,
return VK_SUCCESS;
}
/* For other queries we need to wait for the queue to signal that
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
if (vk_sync_wait(&device->vk, q->timestamp.sync,
0, VK_SYNC_WAIT_COMPLETE, UINT64_MAX) != VK_SUCCESS) {
return vk_device_set_lost(&device->vk, "Query job wait failed");
}
return VK_SUCCESS;
}
assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
/* For performance queries we need to wait for the queue to signal that
* the query has been submitted for execution before anything else.
*/
assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
VkResult result = VK_SUCCESS;
if (!q->maybe_available) {
struct timespec timeout;
@ -485,18 +526,28 @@ query_check_available(struct v3dv_device *device,
struct v3dv_query *q,
uint32_t query_idx)
{
/* For occlusion and performance queries we check the availability BO */
/* For occlusion we check the availability BO */
if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
const uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) +
pool->occlusion.avail_offset + query_idx;
return (*q_addr != 0) ? VK_SUCCESS : VK_NOT_READY;
}
/* For timestamp queries, we need to check if the relevant job
* has completed.
*/
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
if (vk_sync_wait(&device->vk, q->timestamp.sync,
0, VK_SYNC_WAIT_COMPLETE, 0) != VK_SUCCESS) {
return VK_NOT_READY;
}
return VK_SUCCESS;
}
/* For other queries we need to check if the queue has submitted the query
* for execution at all.
*/
assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
if (!q->maybe_available)
return VK_NOT_READY;
@ -521,9 +572,6 @@ query_is_available(struct v3dv_device *device,
{
struct v3dv_query *q = &pool->queries[query];
assert(pool->query_type != VK_QUERY_TYPE_OCCLUSION ||
(pool->occlusion.bo && pool->occlusion.bo->map));
if (do_wait) {
VkResult result = query_wait_available(device, pool, q, query);
if (result != VK_SUCCESS) {
@ -575,7 +623,10 @@ write_timestamp_query_result(struct v3dv_device *device,
struct v3dv_query *q = &pool->queries[query];
write_to_buffer(data, slot, do_64bit, q->value);
const uint8_t *query_addr =
((uint8_t *) pool->timestamp.bo->map) + q->timestamp.offset;
write_to_buffer(data, slot, do_64bit, *((uint64_t *)query_addr));
return VK_SUCCESS;
}
@ -1227,6 +1278,24 @@ v3dv_reset_query_pool_cpu(struct v3dv_device *device,
{
mtx_lock(&device->query_mutex);
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
assert(first + count <= pool->query_count);
/* Reset timestamp */
uint8_t *base_addr;
base_addr = ((uint8_t *) pool->timestamp.bo->map) +
pool->queries[first].timestamp.offset;
memset(base_addr, 0, 8 * count);
for (uint32_t i = first; i < first + count; i++) {
if (vk_sync_reset(&device->vk, pool->queries[i].timestamp.sync) != VK_SUCCESS)
fprintf(stderr, "Failed to reset sync");
}
mtx_unlock(&device->query_mutex);
return;
}
for (uint32_t i = first; i < first + count; i++) {
assert(i < pool->query_count);
struct v3dv_query *q = &pool->queries[i];
@ -1245,9 +1314,6 @@ v3dv_reset_query_pool_cpu(struct v3dv_device *device,
*counter = 0;
break;
}
case VK_QUERY_TYPE_TIMESTAMP:
q->value = 0;
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
kperfmon_destroy(device, pool, i);
kperfmon_create(device, pool, i);

View file

@ -316,12 +316,19 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
assert(info->pool);
/* We are about to reset query counters so we need to make sure that
* The GPU is not using them. The exception is timestamp queries, since
* we handle those in the CPU.
* The GPU is not using them.
*/
if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)
v3dv_bo_wait(job->device, info->pool->occlusion.bo, OS_TIMEOUT_INFINITE);
if (info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
VkResult result = queue_wait_idle(queue, sync_info);
if (result != VK_SUCCESS)
return result;
v3dv_bo_wait(job->device, info->pool->timestamp.bo, OS_TIMEOUT_INFINITE);
}
if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
struct vk_sync_wait waits[info->count];
unsigned wait_count = 0;
@ -409,8 +416,7 @@ handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
int err = 0;
int fd = -1;
assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
result = export_perfmon_last_job_sync(queue, job, &fd);
@ -502,14 +508,20 @@ handle_timestamp_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
assert(info->query + i < info->pool->query_count);
struct v3dv_query *query = &info->pool->queries[info->query + i];
query->maybe_available = true;
if (i == 0)
query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
/* Value */
uint8_t *value_addr =
((uint8_t *) info->pool->timestamp.bo->map) + query->timestamp.offset;
*((uint64_t*)value_addr) = (i == 0) ? t.tv_sec * 1000000000ull + t.tv_nsec : 0ull;
/* Availability */
result = vk_sync_signal(&job->device->vk, query->timestamp.sync, 0);
}
cnd_broadcast(&job->device->query_ended);
mtx_unlock(&job->device->query_mutex);
return VK_SUCCESS;
return result;
}
static VkResult