mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 00:00:12 +01:00
radeonsi: remove a twice duplicated workaround for VERT_GRP_SIZE
This enables better lane occupancy. Acked-by: Timur Kristóf <timur.kristof@gmail.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10813>
This commit is contained in:
parent
c8e8979d6b
commit
a0fcd37731
2 changed files with 13 additions and 30 deletions
|
|
@ -1964,16 +1964,6 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
|
|||
max_esverts_base = 128;
|
||||
}
|
||||
|
||||
/* Hardware has the following non-natural restrictions on the value
|
||||
* of GE_CNTL.VERT_GRP_SIZE based on based on the primitive type of
|
||||
* the draw:
|
||||
* - at most 252 for any line input primitive type
|
||||
* - at most 251 for any quad input primitive type
|
||||
* - at most 251 for triangle strips with adjacency (this happens to
|
||||
* be the natural limit for triangle *lists* with adjacency)
|
||||
*/
|
||||
max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
|
||||
|
||||
if (gs_stage == MESA_SHADER_GEOMETRY) {
|
||||
bool force_multi_cycling = false;
|
||||
unsigned max_out_verts_per_gsprim = gs_sel->info.base.gs.vertices_out * gs_num_invocations;
|
||||
|
|
@ -2125,18 +2115,7 @@ retry_select_mode:
|
|||
}
|
||||
}
|
||||
|
||||
/* On gfx10, the GE only checks against the maximum number of ES verts after
|
||||
* allocating a full GS primitive. So we need to ensure that whenever
|
||||
* this check passes, there is enough space for a full primitive without
|
||||
* vertex reuse.
|
||||
*/
|
||||
if (gs_sel->screen->info.chip_class == GFX10 &&
|
||||
!(shader->key.opt.ngg_culling & (SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST |
|
||||
SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP)))
|
||||
shader->ngg.hw_max_esverts = max_esverts - max_verts_per_prim + 1;
|
||||
else
|
||||
shader->ngg.hw_max_esverts = max_esverts;
|
||||
|
||||
shader->ngg.hw_max_esverts = max_esverts;
|
||||
shader->ngg.max_gsprims = max_gsprims;
|
||||
shader->ngg.max_out_verts = max_out_vertices;
|
||||
shader->ngg.prim_amp_factor = prim_amp_factor;
|
||||
|
|
|
|||
|
|
@ -1314,19 +1314,23 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts) |
|
||||
S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
|
||||
|
||||
/* Bug workaround for a possible hang with non-tessellation cases.
|
||||
* Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
|
||||
/* On gfx10, the GE only checks against the maximum number of ES verts after
|
||||
* allocating a full GS primitive. So we need to ensure that whenever
|
||||
* this check passes, there is enough space for a full primitive without
|
||||
* vertex reuse. VERT_GRP_SIZE=256 doesn't need this. We should always get 256
|
||||
* if we have enough LDS.
|
||||
*
|
||||
* Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
|
||||
* Tessellation is unaffected because it always sets GE_CNTL.VERT_GRP_SIZE = 0.
|
||||
*/
|
||||
if ((sscreen->info.chip_class == GFX10) &&
|
||||
(es_stage == MESA_SHADER_VERTEX || gs_stage == MESA_SHADER_VERTEX) && /* = no tess */
|
||||
shader->ngg.hw_max_esverts != 256) {
|
||||
shader->ngg.hw_max_esverts != 256 &&
|
||||
shader->ngg.hw_max_esverts > 5) {
|
||||
/* This could be based on the input primitive type. 5 is the worst case
|
||||
* for primitive types with adjacency.
|
||||
*/
|
||||
shader->ge_cntl &= C_03096C_VERT_GRP_SIZE;
|
||||
|
||||
if (shader->ngg.hw_max_esverts > 5) {
|
||||
shader->ge_cntl |= S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts - 5);
|
||||
}
|
||||
shader->ge_cntl |= S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts - 5);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue