radeonsi: remove and inline si_shader::ngg::prim_amp_factor

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26055>
This commit is contained in:
Marek Olšák 2023-10-23 22:18:10 -04:00
parent e98a4f3820
commit 6708ccd3bf
3 changed files with 8 additions and 12 deletions

View file

@ -233,17 +233,9 @@ retry_select_mode:
: max_esverts;
assert(max_out_vertices <= 256);
unsigned prim_amp_factor = 1;
if (gs_stage == MESA_SHADER_GEOMETRY) {
/* Number of output primitives per GS input primitive after
* GS instancing. */
prim_amp_factor = gs_sel->info.base.gs.vertices_out;
}
shader->ngg.hw_max_esverts = max_esverts;
shader->ngg.max_gsprims = max_gsprims;
shader->ngg.max_out_verts = max_out_vertices;
shader->ngg.prim_amp_factor = prim_amp_factor;
shader->ngg.max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
/* Don't count unusable vertices. */

View file

@ -931,7 +931,6 @@ struct si_shader {
uint16_t hw_max_esverts;
uint16_t max_gsprims;
uint16_t max_out_verts;
uint16_t prim_amp_factor;
bool max_vert_out_per_gs_instance;
/* Register values. */
unsigned ge_max_output_per_subgroup;

View file

@ -1366,7 +1366,6 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
: V_02870C_SPI_SHADER_NONE);
shader->ngg.ge_max_output_per_subgroup = S_0287FC_MAX_VERTS_PER_SUBGROUP(shader->ngg.max_out_verts);
shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(shader->ngg.prim_amp_factor);
shader->ngg.vgt_gs_instance_cnt =
S_028B90_ENABLE(gs_num_invocations > 1) |
S_028B90_CNT(gs_num_invocations) |
@ -1376,9 +1375,11 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
if (gs_stage == MESA_SHADER_GEOMETRY) {
shader->ngg.esgs_vertex_stride = es_sel->info.esgs_vertex_stride / 4;
shader->ngg.vgt_gs_max_vert_out = gs_sel->info.base.gs.vertices_out;
shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(gs_sel->info.base.gs.vertices_out);
} else {
shader->ngg.esgs_vertex_stride = 1;
shader->ngg.vgt_gs_max_vert_out = 1;
shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(1);
}
if (es_stage == MESA_SHADER_TESS_EVAL)
@ -1432,12 +1433,16 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
}
if (sscreen->info.gfx_level >= GFX11) {
/* This should be <= 252 for performance on Gfx11. 256 works too but is slower. */
unsigned max_prim_grp_size = 252;
unsigned prim_amp_factor = gs_stage == MESA_SHADER_GEOMETRY ?
gs_sel->info.base.gs.vertices_out : 1;
shader->ge_cntl = S_03096C_PRIMS_PER_SUBGRP(shader->ngg.max_gsprims) |
S_03096C_VERTS_PER_SUBGRP(shader->ngg.hw_max_esverts) |
S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi) |
/* This should be <= 252 for performance. 256 works too but is slower. */
S_03096C_PRIM_GRP_SIZE_GFX11(
CLAMP(252 / MAX2(shader->ngg.prim_amp_factor, 1), 1, 256));
CLAMP(max_prim_grp_size / MAX2(prim_amp_factor, 1), 1, 256));
} else {
shader->ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(shader->ngg.max_gsprims) |
S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts) |