brw: fence SLM writes between workgroups
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

On LSC platforms the SLM writes are unfenced between workgroups. This
means a workgroup W1 finishing might have uncompleted SLM writes.
Another workgroup W2 dispatched after W1 which gets allocated an
overlapping SLM location might have writes that race with the previous
W1 operations.

The solution to this is fence all write operations (store & atomics)
of a workgroup before ending the threads. We do this by emitting a
single SLM fence either at the end of the shader or if there is only a
single unfenced right, at the end of that block.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13924
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40430>
This commit is contained in:
Lionel Landwerlin 2026-03-15 21:26:33 +02:00 committed by Marge Bot
parent 32ca98a26e
commit fa523aedd0
4 changed files with 95 additions and 0 deletions

View file

@ -2764,6 +2764,13 @@ brw_postprocess_nir_opts(brw_pass_tracker *pt,
brw_vectorize_lower_mem_access(pt, robust_flags);
/* Fence LSC SLM writes to avoid workgroups WaW hazards to the same SLM
* location.
*/
if (devinfo->has_lsc &&
mesa_shader_stage_uses_workgroup(nir->info.stage))
OPT(brw_nir_fence_shared_stores);
/* Do this after lowering memory access bit-sizes */
if (nir->info.stage == MESA_SHADER_MESH ||
nir->info.stage == MESA_SHADER_TASK) {

View file

@ -138,6 +138,9 @@ void brw_preprocess_nir(const struct brw_compiler *compiler,
nir_shader *nir,
const struct brw_nir_compiler_opts *opts);
bool
brw_nir_fence_shared_stores(nir_shader *shader);
void
brw_nir_link_shaders(const struct brw_compiler *compiler,
nir_shader *producer, nir_shader *consumer);

View file

@ -0,0 +1,84 @@
/*
* Copyright © 2026 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "brw_nir.h"
#include "compiler/nir/nir_builder.h"
static bool
block_is_in_loop(nir_block *block)
{
nir_cf_node *cf_node = block->cf_node.parent;
while (cf_node != NULL) {
if (cf_node->type == nir_cf_node_loop)
return true;
cf_node = cf_node->parent;
}
return false;
}
bool
brw_nir_fence_shared_stores(nir_shader *shader)
{
bool progress = false;
nir_foreach_function_with_impl(function, impl, shader) {
bool multiple_unfenced_write_blocks = false;
nir_block *unfenced_write_block = NULL;
nir_foreach_block(block, impl) {
bool unfenced_writes = false;
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_store_shared:
case nir_intrinsic_shared_atomic:
case nir_intrinsic_shared_atomic_swap:
case nir_intrinsic_store_shared_block_intel:
unfenced_writes = true;
break;
case nir_intrinsic_barrier:
if (nir_intrinsic_memory_modes(intrin) & nir_var_mem_shared)
unfenced_writes = false;
break;
default:
break;
}
}
if (unfenced_writes) {
/* Consider we have multiple blocks if the unfenced write is
* within a loop.
*/
multiple_unfenced_write_blocks =
unfenced_write_block != NULL ||
block_is_in_loop(block);
unfenced_write_block = block;
}
}
if (multiple_unfenced_write_blocks || unfenced_write_block) {
nir_builder b = nir_builder_at(
nir_after_block_before_jump(
multiple_unfenced_write_blocks ?
nir_impl_last_block(impl) :
unfenced_write_block));
nir_barrier(&b,
.execution_scope=SCOPE_NONE,
.memory_scope=SCOPE_WORKGROUP,
.memory_semantics = NIR_MEMORY_RELEASE,
.memory_modes = nir_var_mem_shared);
progress |= nir_progress(true, impl, nir_metadata_control_flow);
}
}
return progress;
}

View file

@ -52,6 +52,7 @@ libintel_compiler_brw_files = files(
'brw_lower_subgroup_ops.cpp',
'brw_nir.h',
'brw_nir.c',
'brw_nir_fence_shared_stores.c',
'brw_nir_lower_cooperative_matrix.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_lower_alpha_to_coverage.c',