diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index 6a4e4476431..b5724d2ecc1 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -3982,9 +3982,10 @@ emit_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer) v3dv_return_if_oom(cmd_buffer, NULL); cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { - if (cmd_buffer->state.query.active_query) { + if (cmd_buffer->state.query.active_query.bo) { counter.address = - v3dv_cl_address(cmd_buffer->state.query.active_query, 0); + v3dv_cl_address(cmd_buffer->state.query.active_query.bo, + cmd_buffer->state.query.active_query.offset); } } @@ -4927,10 +4928,11 @@ v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer, VkQueryControlFlags flags) { /* FIXME: we only support one active query for now */ - assert(cmd_buffer->state.query.active_query == NULL); + assert(cmd_buffer->state.query.active_query.bo == NULL); assert(query < pool->query_count); - cmd_buffer->state.query.active_query = pool->queries[query].bo; + cmd_buffer->state.query.active_query.bo = pool->queries[query].bo; + cmd_buffer->state.query.active_query.offset = pool->queries[query].offset; cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY; } @@ -4940,7 +4942,7 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer, uint32_t query) { assert(query < pool->query_count); - assert(cmd_buffer->state.query.active_query != NULL); + assert(cmd_buffer->state.query.active_query.bo != NULL); if (cmd_buffer->state.pass) { /* Queue the EndQuery in the command buffer state, we will create a CPU @@ -4973,7 +4975,7 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer, list_addtail(&job->list_link, &cmd_buffer->jobs); } - cmd_buffer->state.query.active_query = NULL; + cmd_buffer->state.query.active_query.bo = NULL; cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY; } diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 8a9943bdf32..91c97d4a5da 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -1141,10 +1141,13 @@ struct v3dv_cmd_buffer_state { struct v3dv_end_query_cpu_job_info *states; } end; - /* This is not NULL if we have an active query, that is, we have called - * vkCmdBeginQuery but not vkCmdEndQuery. + /* This BO is not NULL if we have an active query, that is, we have + * called vkCmdBeginQuery but not vkCmdEndQuery. */ - struct v3dv_bo *active_query; + struct { + struct v3dv_bo *bo; + uint32_t offset; + } active_query; } query; }; @@ -1205,14 +1208,21 @@ struct v3dv_combined_image_sampler_descriptor { struct v3dv_query { bool maybe_available; union { - struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */ - uint64_t value; /* Used by CPU queries (timestamp) */ + /* Used by GPU queries (occlusion) */ + struct { + struct v3dv_bo *bo; + uint32_t offset; + }; + /* Used by CPU queries (timestamp) */ + uint64_t value; }; }; struct v3dv_query_pool { struct vk_object_base base; + struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */ + VkQueryType query_type; uint32_t query_count; struct v3dv_query *queries; diff --git a/src/broadcom/vulkan/v3dv_query.c b/src/broadcom/vulkan/v3dv_query.c index d3100498cbe..94ea46ee5d9 100644 --- a/src/broadcom/vulkan/v3dv_query.c +++ b/src/broadcom/vulkan/v3dv_query.c @@ -35,9 +35,6 @@ v3dv_CreateQueryPool(VkDevice _device, pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP); assert(pCreateInfo->queryCount > 0); - /* FIXME: the hw allows us to allocate up to 16 queries in a single block - * for occlussion queries so we should try to use that. - */ struct v3dv_query_pool *pool = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool), VK_OBJECT_TYPE_QUERY_POOL); @@ -54,25 +51,38 @@ v3dv_CreateQueryPool(VkDevice _device, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool->queries == NULL) { result = vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_alloc_bo_list; + goto fail; + } + + if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) { + /* The hardware allows us to setup groups of 16 queries in consecutive + * 4-byte addresses, requiring only that each group of 16 queries is + * aligned to a 1024 byte boundary. + */ + const uint32_t query_groups = DIV_ROUND_UP(pool->query_count, 16); + const uint32_t bo_size = query_groups * 1024; + pool->bo = v3dv_bo_alloc(device, bo_size, "query", true); + if (!pool->bo) { + result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + if (!v3dv_bo_map(device, pool->bo, bo_size)) { + result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } } uint32_t i; for (i = 0; i < pool->query_count; i++) { pool->queries[i].maybe_available = false; switch (pool->query_type) { - case VK_QUERY_TYPE_OCCLUSION: - pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true); - if (!pool->queries[i].bo) { - result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_alloc_bo; - } - /* For occlusion queries we only need a 4-byte counter */ - if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) { - result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_alloc_bo; - } + case VK_QUERY_TYPE_OCCLUSION: { + const uint32_t query_group = i / 16; + const uint32_t query_offset = query_group * 1024 + (i % 16) * 4; + pool->queries[i].bo = pool->bo; + pool->queries[i].offset = query_offset; break; + } case VK_QUERY_TYPE_TIMESTAMP: pool->queries[i].value = 0; break; @@ -85,12 +95,11 @@ v3dv_CreateQueryPool(VkDevice _device, return VK_SUCCESS; -fail_alloc_bo: - for (uint32_t j = 0; j < i; j++) - v3dv_bo_free(device, pool->queries[j].bo); - vk_free2(&device->vk.alloc, pAllocator, pool->queries); - -fail_alloc_bo_list: +fail: + if (pool->bo) + v3dv_bo_free(device, pool->bo); + if (pool->queries) + vk_free2(&device->vk.alloc, pAllocator, pool->queries); vk_object_free(&device->vk, pAllocator, pool); return result; @@ -107,12 +116,12 @@ v3dv_DestroyQueryPool(VkDevice _device, if (!pool) return; - if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) { - for (uint32_t i = 0; i < pool->query_count; i++) - v3dv_bo_free(device, pool->queries[i].bo); - } + if (pool->bo) + v3dv_bo_free(device, pool->bo); + + if (pool->queries) + vk_free2(&device->vk.alloc, pAllocator, pool->queries); - vk_free2(&device->vk.alloc, pAllocator, pool->queries); vk_object_free(&device->vk, pAllocator, pool); } @@ -159,7 +168,8 @@ get_occlusion_query_result(struct v3dv_device *device, *available = q->maybe_available && v3dv_bo_wait(device, q->bo, 0); } - return (uint64_t) *((uint32_t *) q->bo->map); + const uint8_t *query_addr = ((uint8_t *) q->bo->map) + q->offset; + return (uint64_t) *((uint32_t *)query_addr); } static uint64_t diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c index 6ea6d1acff8..5698981673a 100644 --- a/src/broadcom/vulkan/v3dv_queue.c +++ b/src/broadcom/vulkan/v3dv_queue.c @@ -163,19 +163,22 @@ handle_reset_query_cpu_job(struct v3dv_job *job) * FIXME: we could avoid blocking the main thread for this if we use * submission thread. */ + if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) + v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE); + for (uint32_t i = info->first; i < info->first + info->count; i++) { assert(i < info->pool->query_count); - struct v3dv_query *query = &info->pool->queries[i]; - query->maybe_available = false; + struct v3dv_query *q = &info->pool->queries[i]; + q->maybe_available = false; switch (info->pool->query_type) { case VK_QUERY_TYPE_OCCLUSION: { - v3dv_bo_wait(job->device, query->bo, PIPE_TIMEOUT_INFINITE); - uint32_t *counter = (uint32_t *) query->bo->map; + const uint8_t *q_addr = ((uint8_t *) q->bo->map) + q->offset; + uint32_t *counter = (uint32_t *) q_addr; *counter = 0; break; } case VK_QUERY_TYPE_TIMESTAMP: - query->value = 0; + q->value = 0; break; default: unreachable("Unsupported query type");