diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 27636d41ac3..f96f72ea339 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -898,7 +898,8 @@ ac_nir_accum_ior(nir_builder *b, nir_def *accum_result, nir_def *new_term) bool ac_nir_gs_shader_query(nir_builder *b, bool has_gen_prim_query, - bool has_pipeline_stats_query, + bool has_gs_invocations_query, + bool has_gs_primitives_query, unsigned num_vertices_per_primitive, unsigned wave_size, nir_def *vertex_count[4], @@ -913,7 +914,7 @@ ac_nir_gs_shader_query(nir_builder *b, any_query_enabled = ac_nir_accum_ior(b, any_query_enabled, prim_gen_query_enabled); } - if (has_pipeline_stats_query) { + if (has_gs_invocations_query || has_gs_primitives_query) { pipeline_query_enabled = nir_load_pipeline_stat_query_enabled_amd(b); any_query_enabled = ac_nir_accum_ior(b, any_query_enabled, pipeline_query_enabled); } @@ -959,7 +960,7 @@ ac_nir_gs_shader_query(nir_builder *b, /* Store the query result to query result using an atomic add. */ nir_if *if_first_lane = nir_push_if(b, nir_elect(b, 1)); { - if (has_pipeline_stats_query) { + if (has_gs_invocations_query || has_gs_primitives_query) { nir_if *if_pipeline_query = nir_push_if(b, pipeline_query_enabled); { nir_def *count = NULL; @@ -974,10 +975,11 @@ ac_nir_gs_shader_query(nir_builder *b, } } - if (count) + if (has_gs_primitives_query && count) nir_atomic_add_gs_emit_prim_count_amd(b, count); - nir_atomic_add_shader_invocation_count_amd(b, num_active_threads); + if (has_gs_invocations_query) + nir_atomic_add_shader_invocation_count_amd(b, num_active_threads); } nir_pop_if(b, if_pipeline_query); } @@ -1237,6 +1239,7 @@ ac_nir_lower_legacy_gs(nir_shader *nir, bool progress = ac_nir_gs_shader_query(b, has_gen_prim_query, has_pipeline_stats_query, + has_pipeline_stats_query, num_vertices_per_primitive, 64, s.vertex_count, diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index 8699d30529d..011fd544ec6 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -166,6 +166,8 @@ typedef struct { bool disable_streamout; bool has_gen_prim_query; bool has_xfb_prim_query; + bool has_gs_invocations_query; + bool has_gs_primitives_query; bool kill_pointsize; bool kill_layer; bool force_vrs; @@ -268,7 +270,8 @@ ac_nir_lower_legacy_vs(nir_shader *nir, bool ac_nir_gs_shader_query(nir_builder *b, bool has_gen_prim_query, - bool has_pipeline_stats_query, + bool has_gs_invocations_query, + bool has_gs_primitives_query, unsigned num_vertices_per_primitive, unsigned wave_size, nir_def *vertex_count[4], diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 74646f1c2bd..8e0e316009f 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -3573,7 +3573,8 @@ ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options) b->cursor = nir_after_cf_list(&if_gs_thread->then_list); ac_nir_gs_shader_query(b, state.options->has_gen_prim_query, - state.options->gfx_level < GFX11, + state.options->has_gs_invocations_query, + state.options->has_gs_primitives_query, state.num_vertices_per_primitive, state.options->wave_size, state.vertex_count, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index db32f0a4224..821dde34bfa 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -856,6 +856,8 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, options.disable_streamout = !device->physical_device->use_ngg_streamout; options.has_gen_prim_query = info->has_prim_query; options.has_xfb_prim_query = info->has_xfb_query; + options.has_gs_invocations_query = device->physical_device->rad_info.gfx_level < GFX11; + options.has_gs_primitives_query = device->physical_device->rad_info.gfx_level < GFX11; options.force_vrs = info->force_vrs_per_vertex; if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 8a99e9682b9..cf1f846842f 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -470,12 +470,17 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s } case nir_intrinsic_atomic_add_gs_emit_prim_count_amd: case nir_intrinsic_atomic_add_shader_invocation_count_amd: { - nir_def *buf = - si_nir_load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF, 4); - enum pipe_statistics_query_index index = intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd ? PIPE_STAT_QUERY_GS_PRIMITIVES : PIPE_STAT_QUERY_GS_INVOCATIONS; + + /* GFX11 only needs to emulate PIPE_STAT_QUERY_GS_PRIMITIVES because GS culls, + * which makes the pipeline statistic incorrect. + */ + assert(sel->screen->info.gfx_level < GFX11 || index == PIPE_STAT_QUERY_GS_PRIMITIVES); + + nir_def *buf = + si_nir_load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF, 4); unsigned offset = si_query_pipestat_end_dw_offset(sel->screen, index) * 4; nir_def *count = intrin->src[0].ssa; diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 93736254ca9..55e768d7c15 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -693,6 +693,12 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, unsigned if ((index == PIPE_STAT_QUERY_GS_PRIMITIVES || index == PIPE_STAT_QUERY_GS_INVOCATIONS) && sscreen->use_ngg && (sscreen->info.gfx_level >= GFX10 && sscreen->info.gfx_level <= GFX10_3)) query->flags |= SI_QUERY_EMULATE_GS_COUNTERS; + + /* GFX11 only emulates PIPE_STAT_QUERY_GS_PRIMITIVES because the shader culls, + * which makes the statistic incorrect. + */ + if (sscreen->info.gfx_level >= GFX11 && index == PIPE_STAT_QUERY_GS_PRIMITIVES) + query->flags |= SI_QUERY_EMULATE_GS_COUNTERS; break; default: assert(0); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0645de43256..4fa430fdbbf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1976,6 +1976,8 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir) options.gs_out_vtx_bytes = sel->info.gsvs_vertex_size; options.has_gen_prim_query = options.has_xfb_prim_query = sel->screen->info.gfx_level >= GFX11; + options.has_gs_invocations_query = sel->screen->info.gfx_level < GFX11; + options.has_gs_primitives_query = true; /* For monolithic ES/GS to add vscnt wait when GS export pos0. */ if (key->ge.part.gs.es)