From 6708ccd3bf4ba1668c19bdd2acae033ffd649465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 23 Oct 2023 22:18:10 -0400 Subject: [PATCH] radeonsi: remove and inline si_shader::ngg::prim_amp_factor Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 8 -------- src/gallium/drivers/radeonsi/si_shader.h | 1 - src/gallium/drivers/radeonsi/si_state_shaders.cpp | 11 ++++++++--- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 3c431cf0bbc..e008c356fe7 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -233,17 +233,9 @@ retry_select_mode: : max_esverts; assert(max_out_vertices <= 256); - unsigned prim_amp_factor = 1; - if (gs_stage == MESA_SHADER_GEOMETRY) { - /* Number of output primitives per GS input primitive after - * GS instancing. */ - prim_amp_factor = gs_sel->info.base.gs.vertices_out; - } - shader->ngg.hw_max_esverts = max_esverts; shader->ngg.max_gsprims = max_gsprims; shader->ngg.max_out_verts = max_out_vertices; - shader->ngg.prim_amp_factor = prim_amp_factor; shader->ngg.max_vert_out_per_gs_instance = max_vert_out_per_gs_instance; /* Don't count unusable vertices. */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index a4bfbc26444..5d5816f619b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -931,7 +931,6 @@ struct si_shader { uint16_t hw_max_esverts; uint16_t max_gsprims; uint16_t max_out_verts; - uint16_t prim_amp_factor; bool max_vert_out_per_gs_instance; /* Register values. */ unsigned ge_max_output_per_subgroup; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 000de082632..eb2c136e2a8 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1366,7 +1366,6 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE); shader->ngg.ge_max_output_per_subgroup = S_0287FC_MAX_VERTS_PER_SUBGROUP(shader->ngg.max_out_verts); - shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(shader->ngg.prim_amp_factor); shader->ngg.vgt_gs_instance_cnt = S_028B90_ENABLE(gs_num_invocations > 1) | S_028B90_CNT(gs_num_invocations) | @@ -1376,9 +1375,11 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader if (gs_stage == MESA_SHADER_GEOMETRY) { shader->ngg.esgs_vertex_stride = es_sel->info.esgs_vertex_stride / 4; shader->ngg.vgt_gs_max_vert_out = gs_sel->info.base.gs.vertices_out; + shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(gs_sel->info.base.gs.vertices_out); } else { shader->ngg.esgs_vertex_stride = 1; shader->ngg.vgt_gs_max_vert_out = 1; + shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(1); } if (es_stage == MESA_SHADER_TESS_EVAL) @@ -1432,12 +1433,16 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader } if (sscreen->info.gfx_level >= GFX11) { + /* This should be <= 252 for performance on Gfx11. 256 works too but is slower. */ + unsigned max_prim_grp_size = 252; + unsigned prim_amp_factor = gs_stage == MESA_SHADER_GEOMETRY ? + gs_sel->info.base.gs.vertices_out : 1; + shader->ge_cntl = S_03096C_PRIMS_PER_SUBGRP(shader->ngg.max_gsprims) | S_03096C_VERTS_PER_SUBGRP(shader->ngg.hw_max_esverts) | S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi) | - /* This should be <= 252 for performance. 256 works too but is slower. */ S_03096C_PRIM_GRP_SIZE_GFX11( - CLAMP(252 / MAX2(shader->ngg.prim_amp_factor, 1), 1, 256)); + CLAMP(max_prim_grp_size / MAX2(prim_amp_factor, 1), 1, 256)); } else { shader->ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(shader->ngg.max_gsprims) | S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts) |