ac/nir/ngg: add a barrier before prim id export

When culling enabled, it will use LDS space, which overlap with
the prim id export.

Fixes: e97f0463a8 ("ac/nir: Implement NGG deferred attribute culling in NIR.")
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17593>
This commit is contained in:
Qiang Yu 2022-06-28 16:18:21 +08:00 committed by Marge Bot
parent 0b7ef846b3
commit eeaf0b1888

View file

@ -54,6 +54,7 @@ typedef struct
bool early_prim_export;
bool use_edgeflags;
bool has_prim_query;
bool can_cull;
unsigned wave_size;
unsigned max_num_waves;
unsigned num_vertices_per_primitives;
@ -420,6 +421,17 @@ emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *st)
static void
emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def *arg)
{
bool need_prim_id_store_shared =
st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX;
/* Add barrier if LDS is already used by culling and we need LDS for prim id here. */
if (st->can_cull && need_prim_id_store_shared) {
nir_scoped_barrier(b, .execution_scope = NIR_SCOPE_WORKGROUP,
.memory_scope = NIR_SCOPE_WORKGROUP,
.memory_semantics = NIR_MEMORY_ACQ_REL,
.memory_modes = nir_var_mem_shared);
}
nir_ssa_def *gs_thread = st->gs_accepted_var
? nir_load_var(b, st->gs_accepted_var)
: nir_has_input_primitive_amd(b);
@ -429,7 +441,7 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def
if (!arg)
arg = emit_ngg_nogs_prim_exp_arg(b, st);
if (st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX) {
if (need_prim_id_store_shared) {
nir_ssa_def *prim_valid = nir_ieq_imm(b, nir_ushr_imm(b, arg, 31), 0);
nir_if *if_prim_valid = nir_push_if(b, prim_valid);
{
@ -1389,6 +1401,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
.early_prim_export = early_prim_export,
.use_edgeflags = use_edgeflags,
.has_prim_query = has_prim_query,
.can_cull = can_cull,
.num_vertices_per_primitives = num_vertices_per_primitives,
.provoking_vtx_idx = provoking_vtx_last ? (num_vertices_per_primitives - 1) : 0,
.position_value_var = position_value_var,