mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
radeonsi: fix applying the NGG minimum vertex count requirement
The code applied the restriction too late, which could overflow LDS size, which started happening more often after the minimum vertex count was increased for Sienna. Incorporate the clamping into the previous code for rounding up the counts. Now the LDS size can never overflow, but it may use vector lanes less efficiently (max_gsprims can be decreased more), which will be addressed in the next commit. Fixes:4ecc39e1aa("radeonsi/gfx10: NGG geometry shader PM4 and upload") Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit64c741ffb7)
This commit is contained in:
parent
a4dfa2bdf8
commit
cad3caba1a
2 changed files with 9 additions and 4 deletions
|
|
@ -3739,7 +3739,7 @@
|
|||
"description": "radeonsi: fix applying the NGG minimum vertex count requirement",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"master_sha": null,
|
||||
"because_sha": "4ecc39e1aa1568f19ebf54a99ffe14643bac7d15"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -2003,6 +2003,8 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
|
|||
max_esverts =
|
||||
MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
|
||||
max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
|
||||
/* Hardware restriction: minimum value of max_esverts */
|
||||
max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim);
|
||||
|
||||
max_gsprims = align(max_gsprims, wavesize);
|
||||
max_gsprims = MIN2(max_gsprims, max_gsprims_base);
|
||||
|
|
@ -2012,10 +2014,13 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
|
|||
clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, use_adjacency);
|
||||
assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
|
||||
} while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
|
||||
}
|
||||
|
||||
/* Hardware restriction: minimum value of max_esverts */
|
||||
max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim);
|
||||
/* Verify the restriction. */
|
||||
assert(max_esverts >= 23 + max_verts_per_prim);
|
||||
} else {
|
||||
/* Hardware restriction: minimum value of max_esverts */
|
||||
max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim);
|
||||
}
|
||||
|
||||
unsigned max_out_vertices =
|
||||
max_vert_out_per_gs_instance
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue