mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
radeonsi: fix the fast launch vert/prim thread counts if they are trimmed
This fixes the case when the counts were out of sync because one of them was decreased. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10813>
This commit is contained in:
parent
f6e19fd831
commit
c8e8979d6b
2 changed files with 28 additions and 6 deletions
|
|
@ -2103,12 +2103,36 @@ retry_select_mode:
|
|||
prim_amp_factor = gs_sel->info.base.gs.vertices_out;
|
||||
}
|
||||
|
||||
/* Fix up the thread counts for fast launch. */
|
||||
if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST) {
|
||||
/* The vertex count must be a multiple of 3. */
|
||||
max_esverts -= max_esverts % 3;
|
||||
/* We can only decrease the size, not increase it. */
|
||||
if (max_gsprims * 3 < max_esverts) {
|
||||
max_esverts = max_gsprims * 3;
|
||||
} else {
|
||||
max_gsprims = max_esverts / 3;
|
||||
}
|
||||
} else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP) {
|
||||
/* The primitive count must be even to get correct winding for triangle strips. */
|
||||
max_gsprims &= ~1;
|
||||
if (max_gsprims - 2 < max_esverts) {
|
||||
max_esverts = max_gsprims + 2;
|
||||
} else {
|
||||
max_gsprims = max_esverts - 2;
|
||||
max_gsprims &= ~1;
|
||||
max_esverts = max_gsprims + 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* On gfx10, the GE only checks against the maximum number of ES verts after
|
||||
* allocating a full GS primitive. So we need to ensure that whenever
|
||||
* this check passes, there is enough space for a full primitive without
|
||||
* vertex reuse.
|
||||
*/
|
||||
if (gs_sel->screen->info.chip_class == GFX10)
|
||||
if (gs_sel->screen->info.chip_class == GFX10 &&
|
||||
!(shader->key.opt.ngg_culling & (SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST |
|
||||
SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP)))
|
||||
shader->ngg.hw_max_esverts = max_esverts - max_verts_per_prim + 1;
|
||||
else
|
||||
shader->ngg.hw_max_esverts = max_esverts;
|
||||
|
|
|
|||
|
|
@ -1305,12 +1305,10 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
shader->ctx_reg.ngg.ge_pc_alloc = S_030980_OVERSUB_EN(sscreen->info.use_late_alloc) |
|
||||
S_030980_NUM_PC_LINES(oversub_pc_lines - 1);
|
||||
|
||||
if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST) {
|
||||
if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST ||
|
||||
shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP) {
|
||||
shader->ge_cntl = S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
|
||||
S_03096C_VERT_GRP_SIZE(shader->ngg.max_gsprims * 3);
|
||||
} else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP) {
|
||||
shader->ge_cntl = S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
|
||||
S_03096C_VERT_GRP_SIZE(shader->ngg.max_gsprims + 2);
|
||||
S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts);
|
||||
} else {
|
||||
shader->ge_cntl = S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
|
||||
S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts) |
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue