radv: allocate memory for the shader query buffer on GFX12

The allocation is done on-demand.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33041>
This commit is contained in:
Samuel Pitoiset 2025-01-15 05:52:01 -08:00 committed by Marge Bot
parent 15a69991fe
commit 2f86338ba3
4 changed files with 37 additions and 0 deletions

View file

@ -486,6 +486,8 @@ struct radv_cmd_state {
bool uses_vrs_coarse_shading;
bool uses_dynamic_patch_control_points;
bool uses_fbfetch_output;
uint64_t shader_query_buf_va; /* GFX12+ */
};
struct radv_enc_state {

View file

@ -151,6 +151,11 @@
#define RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET 60
#define RADV_SHADER_QUERY_TS_INVOCATION_OFFSET 64
/* Size of the shader query buffer for generated/written primitive queries
* using SSBO atomics on GFX12.
*/
#define RADV_SHADER_QUERY_BUF_SIZE (RADV_SHADER_QUERY_PRIM_XFB_OFFSET(3) - RADV_SHADER_QUERY_PRIM_GEN_OFFSET(0) + 4)
/* Number of samples for line smooth lowering (hw requirement). */
#define RADV_NUM_SMOOTH_AA_SAMPLES 4

View file

@ -961,6 +961,28 @@ emit_sample_streamout(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t
radeon_emit(cs, va >> 32);
}
static void
radv_alloc_shader_query_buf(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned offset;
void *ptr;
assert(pdev->info.gfx_level >= GFX12);
if (cmd_buffer->state.shader_query_buf_va)
return;
if (!radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, RADV_SHADER_QUERY_BUF_SIZE, 64, &offset, &ptr))
return;
memset(ptr, 0, RADV_SHADER_QUERY_BUF_SIZE);
cmd_buffer->state.shader_query_buf_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
cmd_buffer->state.shader_query_buf_va += offset;
}
static void
radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t index)
{
@ -1953,6 +1975,10 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
/* The number of task shader invocations needs to be queried on ACE. */
pool->uses_ace = (pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
pool->uses_shader_query_buf =
pdev->info.gfx_level >= GFX12 && (pool->vk.query_type == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT ||
pool->vk.query_type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT);
switch (pCreateInfo->queryType) {
case VK_QUERY_TYPE_OCCLUSION:
pool->stride = 16 * pdev->info.max_render_backends;
@ -2701,6 +2727,9 @@ radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
radv_cs_add_buffer(device->ws, cmd_buffer->gang.cs, pool->bo);
}
if (pool->uses_shader_query_buf)
radv_alloc_shader_query_buf(cmd_buffer);
emit_begin_query(cmd_buffer, pool, va, pool->vk.query_type, flags, index);
}

View file

@ -26,6 +26,7 @@ struct radv_query_pool {
char *ptr;
bool uses_emulated_queries;
bool uses_ace; /* For task shader invocations on GFX10.3+ */
bool uses_shader_query_buf; /* For generated/written primitives on GFX12+ */
};
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, vk.base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL)