nak: replace get_io_addr_offset with nir_opt_offsets

Totals:
Totals:
CodeSize: 9521188272 -> 9474779520 (-0.49%); split: -0.50%, +0.01%
Number of GPRs: 47361498 -> 47340754 (-0.04%); split: -0.05%, +0.00%
SLM Size: 5444552 -> 5444436 (-0.00%)
Static cycle count: 6182267636 -> 6141873245 (-0.65%); split: -0.69%, +0.03%
Spills to memory: 44288 -> 44241 (-0.11%)
Fills from memory: 44288 -> 44241 (-0.11%)
Spills to reg: 185307 -> 185246 (-0.03%); split: -0.06%, +0.03%
Fills from reg: 225943 -> 225895 (-0.02%); split: -0.04%, +0.01%
Max warps/SM: 50637496 -> 50646924 (+0.02%); split: +0.02%, -0.00%

Totals from 118675 (10.20% of 1163204) affected shaders:
CodeSize: 2675917792 -> 2629509040 (-1.73%); split: -1.77%, +0.04%
Number of GPRs: 7190170 -> 7169426 (-0.29%); split: -0.32%, +0.03%
SLM Size: 2694216 -> 2694100 (-0.00%)
Static cycle count: 3780817453 -> 3740423062 (-1.07%); split: -1.12%, +0.05%
Spills to memory: 40938 -> 40891 (-0.11%)
Fills from memory: 40938 -> 40891 (-0.11%)
Spills to reg: 78989 -> 78928 (-0.08%); split: -0.14%, +0.06%
Fills from reg: 83274 -> 83226 (-0.06%); split: -0.10%, +0.04%
Max warps/SM: 4219736 -> 4229164 (+0.22%); split: +0.23%, -0.01%

Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39525>
This commit is contained in:
Karol Herbst 2026-01-25 13:27:49 +01:00 committed by Marge Bot
parent e5bf1f5aff
commit 5890aedf8c
4 changed files with 40 additions and 131 deletions

View file

@ -460,35 +460,6 @@ impl<'a> ShaderFromNir<'a> {
self.get_ssa_ref(src).into()
}
fn get_io_addr_offset(
&mut self,
addr: &nir_src,
imm_bits: u8,
) -> (Src, i32) {
let addr = addr.as_def();
let addr_offset = unsafe {
nak_get_io_addr_offset(addr as *const _ as *mut _, imm_bits)
};
if let Some(base_def) = std::ptr::NonNull::new(addr_offset.base.def) {
let base_def = unsafe { base_def.as_ref() };
let base_comp = u8::try_from(addr_offset.base.comp).unwrap();
let (base, _) = self.get_ssa_comp(base_def, base_comp);
(base.into(), addr_offset.offset)
} else {
(SrcRef::Zero.into(), addr_offset.offset)
}
}
fn get_cbuf_addr_offset(&mut self, addr: &nir_src) -> (Src, u16) {
let (off, off_imm) = self.get_io_addr_offset(addr, 16);
if let Ok(off_imm_u16) = u16::try_from(off_imm) {
(off, off_imm_u16)
} else {
(self.get_src(addr), 0)
}
}
fn set_dst(&mut self, def: &nir_def, ssa: SSARef) {
self.set_ssa(def, (*ssa).into());
}
@ -2971,7 +2942,7 @@ impl<'a> ShaderFromNir<'a> {
}
nir_intrinsic_global_atomic_nv => {
let bit_size = intrin.def.bit_size();
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let addr = self.get_src(&srcs[0]);
let data = self.get_src(&srcs[1]);
let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
@ -2993,7 +2964,7 @@ impl<'a> ShaderFromNir<'a> {
data: data,
atom_op: atom_op,
atom_type: atom_type,
addr_offset: offset,
addr_offset: intrin.base(),
mem_space: MemSpace::Global(MemAddrType::A64),
mem_order: MemOrder::Strong(MemScope::GPU),
mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
@ -3003,7 +2974,7 @@ impl<'a> ShaderFromNir<'a> {
nir_intrinsic_global_atomic_swap_nv => {
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
let bit_size = intrin.def.bit_size();
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let addr = self.get_src(&srcs[0]);
let cmpr = self.get_src(&srcs[1]);
let data = self.get_src(&srcs[2]);
let atom_type = AtomType::U(bit_size);
@ -3018,7 +2989,7 @@ impl<'a> ShaderFromNir<'a> {
data: data,
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
atom_type: atom_type,
addr_offset: offset,
addr_offset: intrin.base(),
mem_space: MemSpace::Global(MemAddrType::A64),
mem_order: MemOrder::Strong(MemScope::GPU),
mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
@ -3093,12 +3064,11 @@ impl<'a> ShaderFromNir<'a> {
}
nir_intrinsic_vild_nv => {
let dst = b.alloc_ssa(RegFile::GPR);
let (idx, off) = self.get_io_addr_offset(&srcs[0], 8);
let idx = self.get_src(&srcs[0]);
b.push_op(OpViLd {
dst: dst.into(),
idx,
off: off.try_into().unwrap(),
off: intrin.base().try_into().unwrap(),
});
self.set_dst(&intrin.def, dst.into());
}
@ -3122,13 +3092,13 @@ impl<'a> ShaderFromNir<'a> {
eviction_priority: self
.get_eviction_priority(intrin.access()),
};
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let addr = self.get_src(&srcs[0]);
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
b.push_op(OpLd {
dst: dst.clone().into(),
addr: addr,
offset: offset,
offset: intrin.base(),
access: access,
});
self.set_dst(&intrin.def, dst);
@ -3231,13 +3201,13 @@ impl<'a> ShaderFromNir<'a> {
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let addr = self.get_src(&srcs[0]);
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
b.push_op(OpLd {
dst: dst.clone().into(),
addr: addr,
offset: offset,
offset: intrin.base(),
access: access,
});
self.set_dst(&intrin.def, dst);
@ -3252,14 +3222,13 @@ impl<'a> ShaderFromNir<'a> {
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let offset = offset + intrin.base();
let addr = self.get_src(&srcs[0]);
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
b.push_op(OpLd {
dst: dst.clone().into(),
addr: addr,
offset: offset,
offset: intrin.base(),
access: access,
});
self.set_dst(&intrin.def, dst);
@ -3268,7 +3237,7 @@ impl<'a> ShaderFromNir<'a> {
let size_B = intrin.def.bit_size() / 8;
let mem_type = MemType::from_size(size_B, false);
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let addr = self.get_src(&srcs[0]);
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
let locked = b.alloc_ssa(RegFile::Pred);
@ -3276,7 +3245,7 @@ impl<'a> ShaderFromNir<'a> {
dst: dst.clone().into(),
locked: locked.into(),
addr,
offset,
offset: intrin.base(),
mem_type,
});
let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
@ -3319,7 +3288,8 @@ impl<'a> ShaderFromNir<'a> {
(intrin.def.bit_size() / 8) * intrin.def.num_components();
let idx = &srcs[0];
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
let off = self.get_src(&srcs[1]);
let off_imm = intrin.base() as u16;
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
@ -3329,7 +3299,7 @@ impl<'a> ShaderFromNir<'a> {
buf: CBuf::Binding(idx_imm),
offset: off_imm,
};
if off.is_zero() {
if srcs[1].is_zero() {
for (i, comp) in dst.iter().enumerate() {
let i = u16::try_from(i).unwrap();
b.copy_to(
@ -3376,15 +3346,15 @@ impl<'a> ShaderFromNir<'a> {
(intrin.def.bit_size() / 8) * intrin.def.num_components();
let handle = self.get_ssa_ref(&srcs[0]);
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
let off = self.get_src(&srcs[1]);
let cb = CBufRef {
buf: CBuf::BindlessSSA(handle[..].try_into().unwrap()),
offset: off_imm,
offset: intrin.base() as u16,
};
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
if off.is_zero() {
if srcs[1].is_zero() {
for (i, comp) in dst.iter().enumerate() {
let i = u16::try_from(i).unwrap();
b.copy_to(
@ -3565,7 +3535,7 @@ impl<'a> ShaderFromNir<'a> {
}
nir_intrinsic_shared_atomic_nv => {
let bit_size = intrin.def.bit_size();
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let addr = self.get_src(&srcs[0]);
let data = self.get_src(&srcs[1]);
let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
@ -3580,7 +3550,7 @@ impl<'a> ShaderFromNir<'a> {
data: data,
atom_op: atom_op,
atom_type: atom_type,
addr_offset: offset,
addr_offset: intrin.base(),
mem_space: MemSpace::Shared,
mem_order: MemOrder::Strong(MemScope::CTA),
mem_eviction_priority: MemEvictionPriority::Normal,
@ -3590,7 +3560,7 @@ impl<'a> ShaderFromNir<'a> {
nir_intrinsic_shared_atomic_swap_nv => {
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
let bit_size = intrin.def.bit_size();
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let addr = self.get_src(&srcs[0]);
let cmpr = self.get_src(&srcs[1]);
let data = self.get_src(&srcs[2]);
let atom_type = AtomType::U(bit_size);
@ -3605,7 +3575,7 @@ impl<'a> ShaderFromNir<'a> {
data: data,
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
atom_type: atom_type,
addr_offset: offset,
addr_offset: intrin.base(),
mem_space: MemSpace::Shared,
mem_order: MemOrder::Strong(MemScope::CTA),
mem_eviction_priority: MemEvictionPriority::Normal,
@ -3628,12 +3598,12 @@ impl<'a> ShaderFromNir<'a> {
eviction_priority: self
.get_eviction_priority(intrin.access()),
};
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
let addr = self.get_src(&srcs[1]);
b.push_op(OpSt {
addr: addr,
data: data,
offset: offset,
offset: intrin.base(),
access: access,
});
}
@ -3658,12 +3628,12 @@ impl<'a> ShaderFromNir<'a> {
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
let addr = self.get_src(&srcs[1]);
b.push_op(OpSt {
addr: addr,
data: data,
offset: offset,
offset: intrin.base(),
access: access,
});
}
@ -3678,13 +3648,12 @@ impl<'a> ShaderFromNir<'a> {
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
let offset = offset + intrin.base();
let addr = self.get_src(&srcs[1]);
b.push_op(OpSt {
addr: addr,
data: data,
offset: offset,
offset: intrin.base(),
access: access,
});
}
@ -3694,14 +3663,14 @@ impl<'a> ShaderFromNir<'a> {
(srcs[0].bit_size() / 8) * srcs[0].num_components();
let mem_type = MemType::from_size(size_B, false);
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
let addr = self.get_src(&srcs[1]);
let locked = b.alloc_ssa(RegFile::Pred);
b.push_op(OpStSCheckUnlock {
locked: locked.into(),
addr,
data,
offset,
offset: intrin.base(),
mem_type,
});
let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
@ -3795,14 +3764,13 @@ impl<'a> ShaderFromNir<'a> {
LdsmSize::M8N8
};
let dst = b.alloc_ssa_vec(RegFile::GPR, comps);
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let offset = offset + intrin.base();
let addr = self.get_src(&srcs[0]);
b.push_op(OpLdsm {
dst: dst.clone().into(),
mat_size,
mat_count,
addr,
offset,
offset: intrin.base(),
});
self.set_dst(&intrin.def, dst);
}

View file

@ -1058,6 +1058,9 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
return progress;
}
const static struct nir_opt_offsets_options nak_offset_options = {
};
void
nak_postprocess_nir(nir_shader *nir,
const struct nak_compiler *nak,
@ -1194,6 +1197,7 @@ nak_postprocess_nir(nir_shader *nir,
}
OPT(nir, nak_nir_lower_load_store, nak);
OPT(nir, nir_opt_offsets, &nak_offset_options);
OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options);
OPT(nir, nir_lower_int64);
@ -1282,59 +1286,3 @@ nak_postprocess_nir(nir_shader *nir,
nir_print_shader(nir, stderr);
}
}
static bool
scalar_is_imm_int(nir_scalar x, unsigned bits, bool is_signed)
{
if (!nir_scalar_is_const(x))
return false;
if (is_signed) {
int64_t imm = nir_scalar_as_int(x);
return u_intN_min(bits) <= imm && imm <= u_intN_max(bits);
} else {
return nir_scalar_as_uint(x) < u_uintN_max(bits);
}
}
struct nak_io_addr_offset
nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits)
{
nir_scalar addr_s = {
.def = addr,
.comp = 0,
};
/* If the entire address is constant, it's an unsigned immediate */
if (scalar_is_imm_int(addr_s, imm_bits, false)) {
/* Base is a dumb name for this. It should be offset */
return (struct nak_io_addr_offset) {
.offset = nir_scalar_as_int(addr_s),
};
}
addr_s = nir_scalar_chase_movs(addr_s);
if (!nir_scalar_is_alu(addr_s) ||
nir_scalar_alu_op(addr_s) != nir_op_iadd) {
return (struct nak_io_addr_offset) {
.base = addr_s,
};
}
for (unsigned i = 0; i < 2; i++) {
nir_scalar off_s = nir_scalar_chase_alu_src(addr_s, i);
off_s = nir_scalar_chase_movs(off_s);
/* If it's imm+indirect then the immediate is signed */
if (scalar_is_imm_int(off_s, imm_bits, true)) {
return (struct nak_io_addr_offset) {
.base = nir_scalar_chase_alu_src(addr_s, 1 - i),
.offset = nir_scalar_as_int(off_s),
};
}
}
return (struct nak_io_addr_offset) {
.base = addr_s,
};
}

View file

@ -65,7 +65,8 @@ lower_ldcx_to_global(nir_builder *b, nir_intrinsic_instr *load,
nir_iadd(b, addr, nir_u2u64(b, offset)),
.align_mul = nir_intrinsic_align_mul(load),
.align_offset = nir_intrinsic_align_offset(load),
.access = ACCESS_CAN_REORDER);
.access = ACCESS_CAN_REORDER,
.base = nir_intrinsic_base(load));
}
nir_pop_if(b, NULL);
val = nir_if_phi(b, val, zero);

View file

@ -140,14 +140,6 @@ struct nak_xfb_info
nak_xfb_from_nir(const struct nak_compiler *nak,
const struct nir_xfb_info *nir_xfb);
struct nak_io_addr_offset {
nir_scalar base;
int32_t offset;
};
struct nak_io_addr_offset
nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits);
enum nak_nir_tex_ref_type {
/** Indicates that this is a bindless texture */
NAK_NIR_TEX_REF_TYPE_BINDLESS,