From e84168bdac99d94e01549538c07371b0037a1127 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 15 Mar 2026 21:26:33 +0200 Subject: [PATCH] brw: fence SLM writes between workgroups On LSC platforms the SLM writes are unfenced between workgroups. This means a workgroup W1 finishing might have uncompleted SLM writes. Another workgroup W2 dispatched after W1 which gets allocated an overlapping SLM location might have writes that race with the previous W1 operations. The solution to this is fence all write operations (store & atomics) of a workgroup before ending the threads. We do this by emitting a single SLM fence either at the end of the shader or if there is only a single unfenced right, at the end of that block. Signed-off-by: Lionel Landwerlin Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13924 Reviewed-by: Alyssa Rosenzweig (cherry picked from commit fa523aedd0ab96cb4262f3d63e2d8fd75a112efb) Part-of: --- .pick_status.json | 2 +- src/intel/compiler/brw/brw_nir.c | 7 ++ src/intel/compiler/brw/brw_nir.h | 3 + .../brw/brw_nir_fence_shared_stores.c | 84 +++++++++++++++++++ src/intel/compiler/brw/meson.build | 1 + 5 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 src/intel/compiler/brw/brw_nir_fence_shared_stores.c diff --git a/.pick_status.json b/.pick_status.json index 5f3c3c8eae7..a64b1061f92 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1744,7 +1744,7 @@ "description": "brw: fence SLM writes between workgroups", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index ecf587a69e2..3b709968556 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -2625,6 +2625,13 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, brw_vectorize_lower_mem_access(nir, compiler, robust_flags); + /* Fence LSC SLM writes to avoid workgroups WaW hazards to the same SLM + * location. + */ + if (devinfo->has_lsc && + mesa_shader_stage_uses_workgroup(nir->info.stage)) + OPT(brw_nir_fence_shared_stores); + /* Do this after lowering memory access bit-sizes */ if (nir->info.stage == MESA_SHADER_MESH || nir->info.stage == MESA_SHADER_TASK) { diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h index fea495ebc01..f100fe842eb 100644 --- a/src/intel/compiler/brw/brw_nir.h +++ b/src/intel/compiler/brw/brw_nir.h @@ -172,6 +172,9 @@ void brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, const struct brw_nir_compiler_opts *opts); +bool +brw_nir_fence_shared_stores(nir_shader *shader); + void brw_nir_link_shaders(const struct brw_compiler *compiler, nir_shader *producer, nir_shader *consumer); diff --git a/src/intel/compiler/brw/brw_nir_fence_shared_stores.c b/src/intel/compiler/brw/brw_nir_fence_shared_stores.c new file mode 100644 index 00000000000..8749579ab8d --- /dev/null +++ b/src/intel/compiler/brw/brw_nir_fence_shared_stores.c @@ -0,0 +1,84 @@ +/* + * Copyright © 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "brw_nir.h" +#include "compiler/nir/nir_builder.h" + +static bool +block_is_in_loop(nir_block *block) +{ + nir_cf_node *cf_node = block->cf_node.parent; + + while (cf_node != NULL) { + if (cf_node->type == nir_cf_node_loop) + return true; + + cf_node = cf_node->parent; + } + + return false; +} + +bool +brw_nir_fence_shared_stores(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function_with_impl(function, impl, shader) { + bool multiple_unfenced_write_blocks = false; + nir_block *unfenced_write_block = NULL; + nir_foreach_block(block, impl) { + bool unfenced_writes = false; + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_store_shared: + case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_swap: + case nir_intrinsic_store_shared_block_intel: + unfenced_writes = true; + break; + + case nir_intrinsic_barrier: + if (nir_intrinsic_memory_modes(intrin) & nir_var_mem_shared) + unfenced_writes = false; + break; + + default: + break; + } + } + + if (unfenced_writes) { + /* Consider we have multiple blocks if the unfenced write is + * within a loop. + */ + multiple_unfenced_write_blocks = + unfenced_write_block != NULL || + block_is_in_loop(block); + unfenced_write_block = block; + } + } + + if (multiple_unfenced_write_blocks || unfenced_write_block) { + nir_builder b = nir_builder_at( + nir_after_block_before_jump( + multiple_unfenced_write_blocks ? + nir_impl_last_block(impl) : + unfenced_write_block)); + nir_barrier(&b, + .execution_scope=SCOPE_NONE, + .memory_scope=SCOPE_WORKGROUP, + .memory_semantics = NIR_MEMORY_RELEASE, + .memory_modes = nir_var_mem_shared); + progress |= nir_progress(true, impl, nir_metadata_control_flow); + } + } + + return progress; +} diff --git a/src/intel/compiler/brw/meson.build b/src/intel/compiler/brw/meson.build index ccef45181c4..840eb3cee0d 100644 --- a/src/intel/compiler/brw/meson.build +++ b/src/intel/compiler/brw/meson.build @@ -54,6 +54,7 @@ libintel_compiler_brw_files = files( 'brw_nir.h', 'brw_nir.c', 'brw_nir_analyze_ubo_ranges.c', + 'brw_nir_fence_shared_stores.c', 'brw_nir_lower_cooperative_matrix.c', 'brw_nir_lower_cs_intrinsics.c', 'brw_nir_lower_alpha_to_coverage.c',