diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index bde19321669..69f55df2c45 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -54,6 +54,7 @@ typedef struct bool early_prim_export; bool use_edgeflags; bool has_prim_query; + bool can_cull; unsigned wave_size; unsigned max_num_waves; unsigned num_vertices_per_primitives; @@ -420,6 +421,17 @@ emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *st) static void emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def *arg) { + bool need_prim_id_store_shared = + st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX; + + /* Add barrier if LDS is already used by culling and we need LDS for prim id here. */ + if (st->can_cull && need_prim_id_store_shared) { + nir_scoped_barrier(b, .execution_scope = NIR_SCOPE_WORKGROUP, + .memory_scope = NIR_SCOPE_WORKGROUP, + .memory_semantics = NIR_MEMORY_ACQ_REL, + .memory_modes = nir_var_mem_shared); + } + nir_ssa_def *gs_thread = st->gs_accepted_var ? nir_load_var(b, st->gs_accepted_var) : nir_has_input_primitive_amd(b); @@ -429,7 +441,7 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def if (!arg) arg = emit_ngg_nogs_prim_exp_arg(b, st); - if (st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX) { + if (need_prim_id_store_shared) { nir_ssa_def *prim_valid = nir_ieq_imm(b, nir_ushr_imm(b, arg, 31), 0); nir_if *if_prim_valid = nir_push_if(b, prim_valid); { @@ -1389,6 +1401,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, .early_prim_export = early_prim_export, .use_edgeflags = use_edgeflags, .has_prim_query = has_prim_query, + .can_cull = can_cull, .num_vertices_per_primitives = num_vertices_per_primitives, .provoking_vtx_idx = provoking_vtx_last ? (num_vertices_per_primitives - 1) : 0, .position_value_var = position_value_var,