From 7f3900ed20fd00e807aa64dceb8f644fecc106cb Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 14 Apr 2026 10:53:21 +0100 Subject: [PATCH] ac/nir_lower_global_access: perform range analysis if useful fossil-db (navi31): Totals from 1197 (0.59% of 202426) affected shaders: Instrs: 2117283 -> 2108380 (-0.42%); split: -0.47%, +0.05% CodeSize: 11183776 -> 11140060 (-0.39%); split: -0.42%, +0.03% Latency: 13568247 -> 13648044 (+0.59%); split: -0.13%, +0.72% InvThroughput: 2389746 -> 2376716 (-0.55%); split: -0.63%, +0.09% VClause: 43337 -> 43138 (-0.46%); split: -0.51%, +0.05% SClause: 31035 -> 31027 (-0.03%); split: -0.22%, +0.20% Copies: 227528 -> 227002 (-0.23%); split: -0.59%, +0.36% Branches: 29393 -> 29392 (-0.00%); split: -0.01%, +0.00% PreSGPRs: 64238 -> 64336 (+0.15%) PreVGPRs: 70480 -> 70468 (-0.02%) VALU: 1387439 -> 1379274 (-0.59%); split: -0.59%, +0.00% SALU: 185514 -> 185382 (-0.07%); split: -0.56%, +0.49% VOPD: 4425 -> 4400 (-0.56%); split: +0.66%, -1.22% fossil-db (navi21): Totals from 1197 (0.59% of 202427) affected shaders: Instrs: 1987004 -> 1974920 (-0.61%); split: -0.64%, +0.03% CodeSize: 10803928 -> 10745204 (-0.54%); split: -0.56%, +0.01% VGPRs: 83848 -> 83856 (+0.01%); split: -0.01%, +0.02% SpillSGPRs: 9843 -> 9861 (+0.18%) Latency: 14518481 -> 14534898 (+0.11%); split: -0.17%, +0.29% InvThroughput: 3712336 -> 3698081 (-0.38%); split: -0.52%, +0.13% VClause: 52677 -> 52546 (-0.25%); split: -0.51%, +0.27% SClause: 31113 -> 31050 (-0.20%); split: -0.37%, +0.17% Copies: 219723 -> 218017 (-0.78%); split: -1.05%, +0.27% Branches: 33717 -> 33716 (-0.00%); split: -0.01%, +0.00% PreSGPRs: 66364 -> 66480 (+0.17%) PreVGPRs: 71048 -> 71036 (-0.02%) VALU: 1442585 -> 1431331 (-0.78%); split: -0.78%, +0.00% SALU: 209617 -> 208517 (-0.52%); split: -0.88%, +0.35% Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- .../common/nir/ac_nir_lower_global_access.c | 52 +++++++++++++++---- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/src/amd/common/nir/ac_nir_lower_global_access.c b/src/amd/common/nir/ac_nir_lower_global_access.c index 48d6ec47ca7..73818b036d1 100644 --- a/src/amd/common/nir/ac_nir_lower_global_access.c +++ b/src/amd/common/nir/ac_nir_lower_global_access.c @@ -14,15 +14,42 @@ is_u2u64(nir_scalar scalar) return nir_scalar_is_alu(scalar) && nir_scalar_alu_op(scalar) == nir_op_u2u64; } +typedef struct { + nir_shader *shader; + struct hash_table *range_ht; +} lower_state; + +static bool +is_nuw(lower_state *state, nir_scalar scalar) +{ + assert(scalar.def->bit_size == 32); + nir_alu_instr *alu = nir_def_as_alu(scalar.def); + if (alu->no_unsigned_wrap) + return true; + + if (!state->range_ht) + state->range_ht = _mesa_pointer_hash_table_create(NULL); + + nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0); + nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1); + uint32_t ub0 = nir_unsigned_upper_bound(state->shader, state->range_ht, src0); + uint32_t ub1 = nir_unsigned_upper_bound(state->shader, state->range_ht, src1); + if ((UINT32_MAX - ub0) < ub1) + return false; + + alu->no_unsigned_wrap = true; + return true; +} + static nir_def * -try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const, +try_extract_additions(lower_state *state, nir_builder *b, nir_scalar scalar, uint64_t *out_const, nir_def **out_offset, bool require_nuw) { if (!nir_scalar_is_alu(scalar) || nir_scalar_alu_op(scalar) != nir_op_iadd) return NULL; nir_alu_instr *alu = nir_def_as_alu(scalar.def); - if (require_nuw && !alu->no_unsigned_wrap) + if (require_nuw && !is_nuw(state, scalar)) return NULL; nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0); @@ -38,19 +65,19 @@ try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const, continue; *out_offset = nir_mov_scalar(b, offset_scalar); - nir_def *replace_offset = try_extract_additions(b, offset_scalar, out_const, out_offset, true); + nir_def *replace_offset = try_extract_additions(state, b, offset_scalar, out_const, out_offset, true); *out_offset = replace_offset ? replace_offset : *out_offset; } else { continue; } nir_def *replace_src = - try_extract_additions(b, i == 1 ? src0 : src1, out_const, out_offset, require_nuw); + try_extract_additions(state, b, i == 1 ? src0 : src1, out_const, out_offset, require_nuw); return replace_src ? replace_src : nir_ssa_for_alu_src(b, alu, 1 - i); } - nir_def *replace_src0 = try_extract_additions(b, src0, out_const, out_offset, require_nuw); - nir_def *replace_src1 = try_extract_additions(b, src1, out_const, out_offset, require_nuw); + nir_def *replace_src0 = try_extract_additions(state, b, src0, out_const, out_offset, require_nuw); + nir_def *replace_src1 = try_extract_additions(state, b, src1, out_const, out_offset, require_nuw); if (!replace_src0 && !replace_src1) return NULL; @@ -60,7 +87,7 @@ try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const, } static bool -process_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *_) +process_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state) { nir_intrinsic_op op; unsigned access = 0; @@ -92,7 +119,7 @@ process_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *_) nir_def *offset = NULL; nir_scalar src = {addr_src->ssa, 0}; b->cursor = nir_after_def(addr_src->ssa); - nir_def *addr = try_extract_additions(b, src, &off_const, &offset, false); + nir_def *addr = try_extract_additions(state, b, src, &off_const, &offset, false); addr = addr ? addr : addr_src->ssa; b->cursor = nir_before_instr(&intrin->instr); @@ -139,6 +166,11 @@ process_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *_) bool ac_nir_lower_global_access(nir_shader *shader) { - return nir_shader_intrinsics_pass(shader, process_instr, - nir_metadata_control_flow, NULL); + lower_state state; + state.shader = shader; + state.range_ht = NULL; + bool progress = nir_shader_intrinsics_pass(shader, process_instr, + nir_metadata_control_flow, &state); + _mesa_hash_table_destroy(state.range_ht, NULL); + return progress; }