From e1a692f74b943bf708c70f1022c98c877d64edd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 18 Sep 2025 10:09:56 +0200 Subject: [PATCH] nir/opt_offsets: allow for unsigned wraps when folding load/store_shared2_amd offsets Totals from 131 (0.16% of 79839) affected shaders: (Navi48) Instrs: 217026 -> 216541 (-0.22%); split: -0.24%, +0.01% CodeSize: 1150136 -> 1146772 (-0.29%); split: -0.31%, +0.02% Latency: 4225732 -> 4225549 (-0.00%); split: -0.01%, +0.00% InvThroughput: 840231 -> 839823 (-0.05%); split: -0.05%, +0.00% VClause: 3815 -> 3816 (+0.03%) Copies: 15414 -> 15358 (-0.36%); split: -0.38%, +0.02% PreSGPRs: 6322 -> 6323 (+0.02%) PreVGPRs: 6064 -> 6062 (-0.03%) VALU: 117317 -> 116873 (-0.38%); split: -0.40%, +0.02% SALU: 25384 -> 25331 (-0.21%); split: -0.22%, +0.02% Part-of: --- src/compiler/nir/nir_opt_offsets.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/compiler/nir/nir_opt_offsets.c b/src/compiler/nir/nir_opt_offsets.c index d6ae47bcd19..8965214b45a 100644 --- a/src/compiler/nir/nir_opt_offsets.c +++ b/src/compiler/nir/nir_opt_offsets.c @@ -180,23 +180,20 @@ try_fold_shared2(nir_builder *b, bool is_load = intrin->intrinsic == nir_intrinsic_load_shared2_amd; unsigned comp_size = (is_load ? intrin->def.bit_size : intrin->src[0].ssa->bit_size) / 8; unsigned stride = (nir_intrinsic_st64(intrin) ? 64 : 1) * comp_size; - unsigned offset0 = nir_intrinsic_offset0(intrin) * stride; - unsigned offset1 = nir_intrinsic_offset1(intrin) * stride; + uint32_t offset0 = nir_intrinsic_offset0(intrin) * stride; + uint32_t offset1 = nir_intrinsic_offset1(intrin) * stride; nir_src *off_src = &intrin->src[offset_src_idx]; uint32_t const_offset = 0; nir_scalar replace_src = { NULL, 0 }; bool modified_shader = false; if (!nir_src_is_const(*off_src)) { - opt_offsets_state state2 = *state; - state2.progress = false; - - uint32_t max = UINT32_MAX - MAX2(offset0, offset1); + uint32_t max = INT32_MAX - MAX2(offset0, offset1); /* Avoid negative offsets. */ replace_src = nir_get_scalar(off_src->ssa, 0); - if (!try_extract_const_addition(b, state, &replace_src, &const_offset, max, true)) + if (!try_extract_const_addition(b, state, &replace_src, &const_offset, max, false)) return false; - modified_shader = state2.progress; + modified_shader = true; } else { const_offset = nir_src_as_uint(*off_src); }