From 0e3bc3d8c02a02f29a83b38accb5f5c9eecf6ca1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 2 Oct 2025 15:27:58 +0200 Subject: [PATCH] nir/opt_offsets: call allow_offset_wrap() for try_fold_shared2() This prevents applying wrapping offsets on GFX6. Fixes: e1a692f74b943bf708c70f1022c98c877d64edd8 ('nir/opt_offsets: allow for unsigned wraps when folding load/store_shared2_amd offsets') Part-of: --- src/amd/common/nir/ac_nir.c | 2 ++ src/compiler/nir/nir_opt_offsets.c | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index f3e0c32366a..2b2a461f63e 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -904,6 +904,8 @@ ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data) case nir_intrinsic_store_shared: case nir_intrinsic_shared_atomic: case nir_intrinsic_shared_atomic_swap: + case nir_intrinsic_load_shared2_amd: + case nir_intrinsic_store_shared2_amd: /* GFX6 uses a 16-bit adder and can't handle unsigned wrap. */ return gfx_level >= GFX7; default: return false; diff --git a/src/compiler/nir/nir_opt_offsets.c b/src/compiler/nir/nir_opt_offsets.c index 8965214b45a..09f9139e908 100644 --- a/src/compiler/nir/nir_opt_offsets.c +++ b/src/compiler/nir/nir_opt_offsets.c @@ -175,7 +175,8 @@ static bool try_fold_shared2(nir_builder *b, nir_intrinsic_instr *intrin, opt_offsets_state *state, - unsigned offset_src_idx) + unsigned offset_src_idx, + bool need_nuw) { bool is_load = intrin->intrinsic == nir_intrinsic_load_shared2_amd; unsigned comp_size = (is_load ? intrin->def.bit_size : intrin->src[0].ssa->bit_size) / 8; @@ -190,7 +191,7 @@ try_fold_shared2(nir_builder *b, if (!nir_src_is_const(*off_src)) { uint32_t max = INT32_MAX - MAX2(offset0, offset1); /* Avoid negative offsets. */ replace_src = nir_get_scalar(off_src->ssa, 0); - if (!try_extract_const_addition(b, state, &replace_src, &const_offset, max, false)) + if (!try_extract_const_addition(b, state, &replace_src, &const_offset, max, need_nuw)) return false; modified_shader = true; @@ -271,9 +272,9 @@ process_instr(nir_builder *b, nir_instr *instr, void *s) case nir_intrinsic_store_shared_ir3: return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->shared_max), need_nuw); case nir_intrinsic_load_shared2_amd: - return try_fold_shared2(b, intrin, state, 0); + return try_fold_shared2(b, intrin, state, 0, need_nuw); case nir_intrinsic_store_shared2_amd: - return try_fold_shared2(b, intrin, state, 1); + return try_fold_shared2(b, intrin, state, 1, need_nuw); case nir_intrinsic_load_buffer_amd: need_nuw &= !!(nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD); return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max), need_nuw);