mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-06 23:10:39 +02:00
brw: fence SLM writes between workgroups
On LSC platforms the SLM writes are unfenced between workgroups. This means a workgroup W1 finishing might have uncompleted SLM writes. Another workgroup W2 dispatched after W1 which gets allocated an overlapping SLM location might have writes that race with the previous W1 operations. The solution to this is fence all write operations (store & atomics) of a workgroup before ending the threads. We do this by emitting a single SLM fence either at the end of the shader or if there is only a single unfenced right, at the end of that block. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13924 Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40430>
This commit is contained in:
parent
32ca98a26e
commit
fa523aedd0
4 changed files with 95 additions and 0 deletions
|
|
@ -2764,6 +2764,13 @@ brw_postprocess_nir_opts(brw_pass_tracker *pt,
|
|||
|
||||
brw_vectorize_lower_mem_access(pt, robust_flags);
|
||||
|
||||
/* Fence LSC SLM writes to avoid workgroups WaW hazards to the same SLM
|
||||
* location.
|
||||
*/
|
||||
if (devinfo->has_lsc &&
|
||||
mesa_shader_stage_uses_workgroup(nir->info.stage))
|
||||
OPT(brw_nir_fence_shared_stores);
|
||||
|
||||
/* Do this after lowering memory access bit-sizes */
|
||||
if (nir->info.stage == MESA_SHADER_MESH ||
|
||||
nir->info.stage == MESA_SHADER_TASK) {
|
||||
|
|
|
|||
|
|
@ -138,6 +138,9 @@ void brw_preprocess_nir(const struct brw_compiler *compiler,
|
|||
nir_shader *nir,
|
||||
const struct brw_nir_compiler_opts *opts);
|
||||
|
||||
bool
|
||||
brw_nir_fence_shared_stores(nir_shader *shader);
|
||||
|
||||
void
|
||||
brw_nir_link_shaders(const struct brw_compiler *compiler,
|
||||
nir_shader *producer, nir_shader *consumer);
|
||||
|
|
|
|||
84
src/intel/compiler/brw/brw_nir_fence_shared_stores.c
Normal file
84
src/intel/compiler/brw/brw_nir_fence_shared_stores.c
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright © 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "brw_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
static bool
|
||||
block_is_in_loop(nir_block *block)
|
||||
{
|
||||
nir_cf_node *cf_node = block->cf_node.parent;
|
||||
|
||||
while (cf_node != NULL) {
|
||||
if (cf_node->type == nir_cf_node_loop)
|
||||
return true;
|
||||
|
||||
cf_node = cf_node->parent;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_fence_shared_stores(nir_shader *shader)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_function_with_impl(function, impl, shader) {
|
||||
bool multiple_unfenced_write_blocks = false;
|
||||
nir_block *unfenced_write_block = NULL;
|
||||
nir_foreach_block(block, impl) {
|
||||
bool unfenced_writes = false;
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_store_shared:
|
||||
case nir_intrinsic_shared_atomic:
|
||||
case nir_intrinsic_shared_atomic_swap:
|
||||
case nir_intrinsic_store_shared_block_intel:
|
||||
unfenced_writes = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_barrier:
|
||||
if (nir_intrinsic_memory_modes(intrin) & nir_var_mem_shared)
|
||||
unfenced_writes = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (unfenced_writes) {
|
||||
/* Consider we have multiple blocks if the unfenced write is
|
||||
* within a loop.
|
||||
*/
|
||||
multiple_unfenced_write_blocks =
|
||||
unfenced_write_block != NULL ||
|
||||
block_is_in_loop(block);
|
||||
unfenced_write_block = block;
|
||||
}
|
||||
}
|
||||
|
||||
if (multiple_unfenced_write_blocks || unfenced_write_block) {
|
||||
nir_builder b = nir_builder_at(
|
||||
nir_after_block_before_jump(
|
||||
multiple_unfenced_write_blocks ?
|
||||
nir_impl_last_block(impl) :
|
||||
unfenced_write_block));
|
||||
nir_barrier(&b,
|
||||
.execution_scope=SCOPE_NONE,
|
||||
.memory_scope=SCOPE_WORKGROUP,
|
||||
.memory_semantics = NIR_MEMORY_RELEASE,
|
||||
.memory_modes = nir_var_mem_shared);
|
||||
progress |= nir_progress(true, impl, nir_metadata_control_flow);
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
@ -52,6 +52,7 @@ libintel_compiler_brw_files = files(
|
|||
'brw_lower_subgroup_ops.cpp',
|
||||
'brw_nir.h',
|
||||
'brw_nir.c',
|
||||
'brw_nir_fence_shared_stores.c',
|
||||
'brw_nir_lower_cooperative_matrix.c',
|
||||
'brw_nir_lower_cs_intrinsics.c',
|
||||
'brw_nir_lower_alpha_to_coverage.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue