radv: allocate more space for pipeline statistics query on GFX11

There is 14 queries, including new queries for mesh/task shaders.
Ported from RadeonSI.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19199>
This commit is contained in:
Samuel Pitoiset 2022-10-20 09:06:42 +00:00 committed by Marge Bot
parent 0237e5f379
commit 61e54297cd

View file

@ -39,9 +39,16 @@
#define TIMESTAMP_NOT_READY UINT64_MAX
static const int pipelinestat_block_size = 11 * 8;
/* TODO: Add support for mesh/task queries on GFX11 */
static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 8, 9, 10};
static unsigned
radv_get_pipelinestat_query_size(struct radv_device *device)
{
unsigned num_results = device->physical_device->rad_info.gfx_level >= GFX11 ? 14 : 11;
return num_results * 8;
}
static void
radv_store_availability(nir_builder *b, nir_ssa_def *flags, nir_ssa_def *dst_buf,
nir_ssa_def *offset, nir_ssa_def *value32)
@ -196,6 +203,8 @@ build_occlusion_query_shader(struct radv_device *device)
static nir_shader *
build_pipeline_statistics_query_shader(struct radv_device *device)
{
unsigned pipelinestat_block_size = +radv_get_pipelinestat_query_size(device);
/* the shader this builds is roughly
*
* push constants {
@ -1101,7 +1110,7 @@ radv_CreateQueryPool(VkDevice _device, const VkQueryPoolCreateInfo *pCreateInfo,
pool->stride = 16 * device->physical_device->rad_info.max_render_backends;
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
pool->stride = pipelinestat_block_size * 2;
pool->stride = radv_get_pipelinestat_query_size(device) * 2;
if (pool->uses_gds) {
/* When the query pool needs GDS (for counting the number of primitives generated by a
* geometry shader with NGG), allocate 2x64-bit values for begin/end.
@ -1264,6 +1273,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
break;
}
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
const uint32_t *avail_ptr =
(const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
uint64_t ngg_gds_result = 0;
@ -1794,7 +1804,9 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device);
radeon_check_space(cmd_buffer->device->ws, cs, 4);
++cmd_buffer->state.active_pipeline_queries;
@ -1819,6 +1831,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
cmd_buffer->state.active_pipeline_gds_queries++;
}
break;
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
emit_sample_streamout(cmd_buffer, va, index);
break;
@ -1884,7 +1897,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
radeon_emit(cs, (va + 8) >> 32);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device);
radeon_check_space(cmd_buffer->device->ws, cs, 16);
cmd_buffer->state.active_pipeline_queries--;
@ -1912,6 +1927,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
cmd_buffer->state.active_pipeline_gds_queries--;
}
break;
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
emit_sample_streamout(cmd_buffer, va + 16, index);
break;