nir/opt_offsets: allow for unsigned wraps when folding load/store_shared2_amd offsets

Totals from 131 (0.16% of 79839) affected shaders: (Navi48)

Instrs: 217026 -> 216541 (-0.22%); split: -0.24%, +0.01%
CodeSize: 1150136 -> 1146772 (-0.29%); split: -0.31%, +0.02%
Latency: 4225732 -> 4225549 (-0.00%); split: -0.01%, +0.00%
InvThroughput: 840231 -> 839823 (-0.05%); split: -0.05%, +0.00%
VClause: 3815 -> 3816 (+0.03%)
Copies: 15414 -> 15358 (-0.36%); split: -0.38%, +0.02%
PreSGPRs: 6322 -> 6323 (+0.02%)
PreVGPRs: 6064 -> 6062 (-0.03%)
VALU: 117317 -> 116873 (-0.38%); split: -0.40%, +0.02%
SALU: 25384 -> 25331 (-0.21%); split: -0.22%, +0.02%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37453>
This commit is contained in:
Daniel Schürmann 2025-09-18 10:09:56 +02:00 committed by Marge Bot
parent 27f1c671ec
commit e1a692f74b

View file

@ -180,23 +180,20 @@ try_fold_shared2(nir_builder *b,
bool is_load = intrin->intrinsic == nir_intrinsic_load_shared2_amd;
unsigned comp_size = (is_load ? intrin->def.bit_size : intrin->src[0].ssa->bit_size) / 8;
unsigned stride = (nir_intrinsic_st64(intrin) ? 64 : 1) * comp_size;
unsigned offset0 = nir_intrinsic_offset0(intrin) * stride;
unsigned offset1 = nir_intrinsic_offset1(intrin) * stride;
uint32_t offset0 = nir_intrinsic_offset0(intrin) * stride;
uint32_t offset1 = nir_intrinsic_offset1(intrin) * stride;
nir_src *off_src = &intrin->src[offset_src_idx];
uint32_t const_offset = 0;
nir_scalar replace_src = { NULL, 0 };
bool modified_shader = false;
if (!nir_src_is_const(*off_src)) {
opt_offsets_state state2 = *state;
state2.progress = false;
uint32_t max = UINT32_MAX - MAX2(offset0, offset1);
uint32_t max = INT32_MAX - MAX2(offset0, offset1); /* Avoid negative offsets. */
replace_src = nir_get_scalar(off_src->ssa, 0);
if (!try_extract_const_addition(b, state, &replace_src, &const_offset, max, true))
if (!try_extract_const_addition(b, state, &replace_src, &const_offset, max, false))
return false;
modified_shader = state2.progress;
modified_shader = true;
} else {
const_offset = nir_src_as_uint(*off_src);
}