mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-31 03:20:09 +01:00
radeonsi: remove and inline si_shader::ngg::prim_amp_factor
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26055>
This commit is contained in:
parent
e98a4f3820
commit
6708ccd3bf
3 changed files with 8 additions and 12 deletions
|
|
@ -233,17 +233,9 @@ retry_select_mode:
|
|||
: max_esverts;
|
||||
assert(max_out_vertices <= 256);
|
||||
|
||||
unsigned prim_amp_factor = 1;
|
||||
if (gs_stage == MESA_SHADER_GEOMETRY) {
|
||||
/* Number of output primitives per GS input primitive after
|
||||
* GS instancing. */
|
||||
prim_amp_factor = gs_sel->info.base.gs.vertices_out;
|
||||
}
|
||||
|
||||
shader->ngg.hw_max_esverts = max_esverts;
|
||||
shader->ngg.max_gsprims = max_gsprims;
|
||||
shader->ngg.max_out_verts = max_out_vertices;
|
||||
shader->ngg.prim_amp_factor = prim_amp_factor;
|
||||
shader->ngg.max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
|
||||
|
||||
/* Don't count unusable vertices. */
|
||||
|
|
|
|||
|
|
@ -931,7 +931,6 @@ struct si_shader {
|
|||
uint16_t hw_max_esverts;
|
||||
uint16_t max_gsprims;
|
||||
uint16_t max_out_verts;
|
||||
uint16_t prim_amp_factor;
|
||||
bool max_vert_out_per_gs_instance;
|
||||
/* Register values. */
|
||||
unsigned ge_max_output_per_subgroup;
|
||||
|
|
|
|||
|
|
@ -1366,7 +1366,6 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
|
||||
: V_02870C_SPI_SHADER_NONE);
|
||||
shader->ngg.ge_max_output_per_subgroup = S_0287FC_MAX_VERTS_PER_SUBGROUP(shader->ngg.max_out_verts);
|
||||
shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(shader->ngg.prim_amp_factor);
|
||||
shader->ngg.vgt_gs_instance_cnt =
|
||||
S_028B90_ENABLE(gs_num_invocations > 1) |
|
||||
S_028B90_CNT(gs_num_invocations) |
|
||||
|
|
@ -1376,9 +1375,11 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
if (gs_stage == MESA_SHADER_GEOMETRY) {
|
||||
shader->ngg.esgs_vertex_stride = es_sel->info.esgs_vertex_stride / 4;
|
||||
shader->ngg.vgt_gs_max_vert_out = gs_sel->info.base.gs.vertices_out;
|
||||
shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(gs_sel->info.base.gs.vertices_out);
|
||||
} else {
|
||||
shader->ngg.esgs_vertex_stride = 1;
|
||||
shader->ngg.vgt_gs_max_vert_out = 1;
|
||||
shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(1);
|
||||
}
|
||||
|
||||
if (es_stage == MESA_SHADER_TESS_EVAL)
|
||||
|
|
@ -1432,12 +1433,16 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
}
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX11) {
|
||||
/* This should be <= 252 for performance on Gfx11. 256 works too but is slower. */
|
||||
unsigned max_prim_grp_size = 252;
|
||||
unsigned prim_amp_factor = gs_stage == MESA_SHADER_GEOMETRY ?
|
||||
gs_sel->info.base.gs.vertices_out : 1;
|
||||
|
||||
shader->ge_cntl = S_03096C_PRIMS_PER_SUBGRP(shader->ngg.max_gsprims) |
|
||||
S_03096C_VERTS_PER_SUBGRP(shader->ngg.hw_max_esverts) |
|
||||
S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi) |
|
||||
/* This should be <= 252 for performance. 256 works too but is slower. */
|
||||
S_03096C_PRIM_GRP_SIZE_GFX11(
|
||||
CLAMP(252 / MAX2(shader->ngg.prim_amp_factor, 1), 1, 256));
|
||||
CLAMP(max_prim_grp_size / MAX2(prim_amp_factor, 1), 1, 256));
|
||||
} else {
|
||||
shader->ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(shader->ngg.max_gsprims) |
|
||||
S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts) |
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue