diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index 044aac9b20e..03c9bfe58d6 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -486,6 +486,8 @@ struct radv_cmd_state { bool uses_vrs_coarse_shading; bool uses_dynamic_patch_control_points; bool uses_fbfetch_output; + + uint64_t shader_query_buf_va; /* GFX12+ */ }; struct radv_enc_state { diff --git a/src/amd/vulkan/radv_constants.h b/src/amd/vulkan/radv_constants.h index ebad1580761..6398ce1ae29 100644 --- a/src/amd/vulkan/radv_constants.h +++ b/src/amd/vulkan/radv_constants.h @@ -151,6 +151,11 @@ #define RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET 60 #define RADV_SHADER_QUERY_TS_INVOCATION_OFFSET 64 +/* Size of the shader query buffer for generated/written primitive queries + * using SSBO atomics on GFX12. + */ +#define RADV_SHADER_QUERY_BUF_SIZE (RADV_SHADER_QUERY_PRIM_XFB_OFFSET(3) - RADV_SHADER_QUERY_PRIM_GEN_OFFSET(0) + 4) + /* Number of samples for line smooth lowering (hw requirement). */ #define RADV_NUM_SMOOTH_AA_SAMPLES 4 diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 69eb8574947..b688be7e294 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -961,6 +961,28 @@ emit_sample_streamout(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t radeon_emit(cs, va >> 32); } +static void +radv_alloc_shader_query_buf(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); + unsigned offset; + void *ptr; + + assert(pdev->info.gfx_level >= GFX12); + + if (cmd_buffer->state.shader_query_buf_va) + return; + + if (!radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, RADV_SHADER_QUERY_BUF_SIZE, 64, &offset, &ptr)) + return; + + memset(ptr, 0, RADV_SHADER_QUERY_BUF_SIZE); + + cmd_buffer->state.shader_query_buf_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + cmd_buffer->state.shader_query_buf_va += offset; +} + static void radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t index) { @@ -1953,6 +1975,10 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * /* The number of task shader invocations needs to be queried on ACE. */ pool->uses_ace = (pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); + pool->uses_shader_query_buf = + pdev->info.gfx_level >= GFX12 && (pool->vk.query_type == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT || + pool->vk.query_type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT); + switch (pCreateInfo->queryType) { case VK_QUERY_TYPE_OCCLUSION: pool->stride = 16 * pdev->info.max_render_backends; @@ -2701,6 +2727,9 @@ radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPoo radv_cs_add_buffer(device->ws, cmd_buffer->gang.cs, pool->bo); } + if (pool->uses_shader_query_buf) + radv_alloc_shader_query_buf(cmd_buffer); + emit_begin_query(cmd_buffer, pool, va, pool->vk.query_type, flags, index); } diff --git a/src/amd/vulkan/radv_query.h b/src/amd/vulkan/radv_query.h index 84f4dcd600d..b428bc0abbf 100644 --- a/src/amd/vulkan/radv_query.h +++ b/src/amd/vulkan/radv_query.h @@ -26,6 +26,7 @@ struct radv_query_pool { char *ptr; bool uses_emulated_queries; bool uses_ace; /* For task shader invocations on GFX10.3+ */ + bool uses_shader_query_buf; /* For generated/written primitives on GFX12+ */ }; VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, vk.base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL)