mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-22 03:00:35 +01:00
nak: replace get_io_addr_offset with nir_opt_offsets
Totals: Totals: CodeSize: 9521188272 -> 9474779520 (-0.49%); split: -0.50%, +0.01% Number of GPRs: 47361498 -> 47340754 (-0.04%); split: -0.05%, +0.00% SLM Size: 5444552 -> 5444436 (-0.00%) Static cycle count: 6182267636 -> 6141873245 (-0.65%); split: -0.69%, +0.03% Spills to memory: 44288 -> 44241 (-0.11%) Fills from memory: 44288 -> 44241 (-0.11%) Spills to reg: 185307 -> 185246 (-0.03%); split: -0.06%, +0.03% Fills from reg: 225943 -> 225895 (-0.02%); split: -0.04%, +0.01% Max warps/SM: 50637496 -> 50646924 (+0.02%); split: +0.02%, -0.00% Totals from 118675 (10.20% of 1163204) affected shaders: CodeSize: 2675917792 -> 2629509040 (-1.73%); split: -1.77%, +0.04% Number of GPRs: 7190170 -> 7169426 (-0.29%); split: -0.32%, +0.03% SLM Size: 2694216 -> 2694100 (-0.00%) Static cycle count: 3780817453 -> 3740423062 (-1.07%); split: -1.12%, +0.05% Spills to memory: 40938 -> 40891 (-0.11%) Fills from memory: 40938 -> 40891 (-0.11%) Spills to reg: 78989 -> 78928 (-0.08%); split: -0.14%, +0.06% Fills from reg: 83274 -> 83226 (-0.06%); split: -0.10%, +0.04% Max warps/SM: 4219736 -> 4229164 (+0.22%); split: +0.23%, -0.01% Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39525>
This commit is contained in:
parent
e5bf1f5aff
commit
5890aedf8c
4 changed files with 40 additions and 131 deletions
|
|
@ -460,35 +460,6 @@ impl<'a> ShaderFromNir<'a> {
|
|||
self.get_ssa_ref(src).into()
|
||||
}
|
||||
|
||||
fn get_io_addr_offset(
|
||||
&mut self,
|
||||
addr: &nir_src,
|
||||
imm_bits: u8,
|
||||
) -> (Src, i32) {
|
||||
let addr = addr.as_def();
|
||||
let addr_offset = unsafe {
|
||||
nak_get_io_addr_offset(addr as *const _ as *mut _, imm_bits)
|
||||
};
|
||||
|
||||
if let Some(base_def) = std::ptr::NonNull::new(addr_offset.base.def) {
|
||||
let base_def = unsafe { base_def.as_ref() };
|
||||
let base_comp = u8::try_from(addr_offset.base.comp).unwrap();
|
||||
let (base, _) = self.get_ssa_comp(base_def, base_comp);
|
||||
(base.into(), addr_offset.offset)
|
||||
} else {
|
||||
(SrcRef::Zero.into(), addr_offset.offset)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_cbuf_addr_offset(&mut self, addr: &nir_src) -> (Src, u16) {
|
||||
let (off, off_imm) = self.get_io_addr_offset(addr, 16);
|
||||
if let Ok(off_imm_u16) = u16::try_from(off_imm) {
|
||||
(off, off_imm_u16)
|
||||
} else {
|
||||
(self.get_src(addr), 0)
|
||||
}
|
||||
}
|
||||
|
||||
fn set_dst(&mut self, def: &nir_def, ssa: SSARef) {
|
||||
self.set_ssa(def, (*ssa).into());
|
||||
}
|
||||
|
|
@ -2971,7 +2942,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
nir_intrinsic_global_atomic_nv => {
|
||||
let bit_size = intrin.def.bit_size();
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let data = self.get_src(&srcs[1]);
|
||||
let atom_type = self.get_atomic_type(intrin);
|
||||
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
||||
|
|
@ -2993,7 +2964,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
data: data,
|
||||
atom_op: atom_op,
|
||||
atom_type: atom_type,
|
||||
addr_offset: offset,
|
||||
addr_offset: intrin.base(),
|
||||
mem_space: MemSpace::Global(MemAddrType::A64),
|
||||
mem_order: MemOrder::Strong(MemScope::GPU),
|
||||
mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
|
||||
|
|
@ -3003,7 +2974,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
nir_intrinsic_global_atomic_swap_nv => {
|
||||
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
|
||||
let bit_size = intrin.def.bit_size();
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let cmpr = self.get_src(&srcs[1]);
|
||||
let data = self.get_src(&srcs[2]);
|
||||
let atom_type = AtomType::U(bit_size);
|
||||
|
|
@ -3018,7 +2989,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
data: data,
|
||||
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
|
||||
atom_type: atom_type,
|
||||
addr_offset: offset,
|
||||
addr_offset: intrin.base(),
|
||||
mem_space: MemSpace::Global(MemAddrType::A64),
|
||||
mem_order: MemOrder::Strong(MemScope::GPU),
|
||||
mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
|
||||
|
|
@ -3093,12 +3064,11 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
nir_intrinsic_vild_nv => {
|
||||
let dst = b.alloc_ssa(RegFile::GPR);
|
||||
|
||||
let (idx, off) = self.get_io_addr_offset(&srcs[0], 8);
|
||||
let idx = self.get_src(&srcs[0]);
|
||||
b.push_op(OpViLd {
|
||||
dst: dst.into(),
|
||||
idx,
|
||||
off: off.try_into().unwrap(),
|
||||
off: intrin.base().try_into().unwrap(),
|
||||
});
|
||||
self.set_dst(&intrin.def, dst.into());
|
||||
}
|
||||
|
|
@ -3122,13 +3092,13 @@ impl<'a> ShaderFromNir<'a> {
|
|||
eviction_priority: self
|
||||
.get_eviction_priority(intrin.access()),
|
||||
};
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||
|
||||
b.push_op(OpLd {
|
||||
dst: dst.clone().into(),
|
||||
addr: addr,
|
||||
offset: offset,
|
||||
offset: intrin.base(),
|
||||
access: access,
|
||||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
|
|
@ -3231,13 +3201,13 @@ impl<'a> ShaderFromNir<'a> {
|
|||
order: MemOrder::Strong(MemScope::CTA),
|
||||
eviction_priority: MemEvictionPriority::Normal,
|
||||
};
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||
|
||||
b.push_op(OpLd {
|
||||
dst: dst.clone().into(),
|
||||
addr: addr,
|
||||
offset: offset,
|
||||
offset: intrin.base(),
|
||||
access: access,
|
||||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
|
|
@ -3252,14 +3222,13 @@ impl<'a> ShaderFromNir<'a> {
|
|||
order: MemOrder::Strong(MemScope::CTA),
|
||||
eviction_priority: MemEvictionPriority::Normal,
|
||||
};
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||
let offset = offset + intrin.base();
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||
|
||||
b.push_op(OpLd {
|
||||
dst: dst.clone().into(),
|
||||
addr: addr,
|
||||
offset: offset,
|
||||
offset: intrin.base(),
|
||||
access: access,
|
||||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
|
|
@ -3268,7 +3237,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let size_B = intrin.def.bit_size() / 8;
|
||||
let mem_type = MemType::from_size(size_B, false);
|
||||
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||
let locked = b.alloc_ssa(RegFile::Pred);
|
||||
|
||||
|
|
@ -3276,7 +3245,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
dst: dst.clone().into(),
|
||||
locked: locked.into(),
|
||||
addr,
|
||||
offset,
|
||||
offset: intrin.base(),
|
||||
mem_type,
|
||||
});
|
||||
let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
|
||||
|
|
@ -3319,7 +3288,8 @@ impl<'a> ShaderFromNir<'a> {
|
|||
(intrin.def.bit_size() / 8) * intrin.def.num_components();
|
||||
let idx = &srcs[0];
|
||||
|
||||
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
|
||||
let off = self.get_src(&srcs[1]);
|
||||
let off_imm = intrin.base() as u16;
|
||||
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||
|
||||
|
|
@ -3329,7 +3299,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
buf: CBuf::Binding(idx_imm),
|
||||
offset: off_imm,
|
||||
};
|
||||
if off.is_zero() {
|
||||
if srcs[1].is_zero() {
|
||||
for (i, comp) in dst.iter().enumerate() {
|
||||
let i = u16::try_from(i).unwrap();
|
||||
b.copy_to(
|
||||
|
|
@ -3376,15 +3346,15 @@ impl<'a> ShaderFromNir<'a> {
|
|||
(intrin.def.bit_size() / 8) * intrin.def.num_components();
|
||||
|
||||
let handle = self.get_ssa_ref(&srcs[0]);
|
||||
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
|
||||
let off = self.get_src(&srcs[1]);
|
||||
|
||||
let cb = CBufRef {
|
||||
buf: CBuf::BindlessSSA(handle[..].try_into().unwrap()),
|
||||
offset: off_imm,
|
||||
offset: intrin.base() as u16,
|
||||
};
|
||||
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||
if off.is_zero() {
|
||||
if srcs[1].is_zero() {
|
||||
for (i, comp) in dst.iter().enumerate() {
|
||||
let i = u16::try_from(i).unwrap();
|
||||
b.copy_to(
|
||||
|
|
@ -3565,7 +3535,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
nir_intrinsic_shared_atomic_nv => {
|
||||
let bit_size = intrin.def.bit_size();
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let data = self.get_src(&srcs[1]);
|
||||
let atom_type = self.get_atomic_type(intrin);
|
||||
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
||||
|
|
@ -3580,7 +3550,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
data: data,
|
||||
atom_op: atom_op,
|
||||
atom_type: atom_type,
|
||||
addr_offset: offset,
|
||||
addr_offset: intrin.base(),
|
||||
mem_space: MemSpace::Shared,
|
||||
mem_order: MemOrder::Strong(MemScope::CTA),
|
||||
mem_eviction_priority: MemEvictionPriority::Normal,
|
||||
|
|
@ -3590,7 +3560,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
nir_intrinsic_shared_atomic_swap_nv => {
|
||||
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
|
||||
let bit_size = intrin.def.bit_size();
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let cmpr = self.get_src(&srcs[1]);
|
||||
let data = self.get_src(&srcs[2]);
|
||||
let atom_type = AtomType::U(bit_size);
|
||||
|
|
@ -3605,7 +3575,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
data: data,
|
||||
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
|
||||
atom_type: atom_type,
|
||||
addr_offset: offset,
|
||||
addr_offset: intrin.base(),
|
||||
mem_space: MemSpace::Shared,
|
||||
mem_order: MemOrder::Strong(MemScope::CTA),
|
||||
mem_eviction_priority: MemEvictionPriority::Normal,
|
||||
|
|
@ -3628,12 +3598,12 @@ impl<'a> ShaderFromNir<'a> {
|
|||
eviction_priority: self
|
||||
.get_eviction_priority(intrin.access()),
|
||||
};
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
|
||||
let addr = self.get_src(&srcs[1]);
|
||||
|
||||
b.push_op(OpSt {
|
||||
addr: addr,
|
||||
data: data,
|
||||
offset: offset,
|
||||
offset: intrin.base(),
|
||||
access: access,
|
||||
});
|
||||
}
|
||||
|
|
@ -3658,12 +3628,12 @@ impl<'a> ShaderFromNir<'a> {
|
|||
order: MemOrder::Strong(MemScope::CTA),
|
||||
eviction_priority: MemEvictionPriority::Normal,
|
||||
};
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
|
||||
let addr = self.get_src(&srcs[1]);
|
||||
|
||||
b.push_op(OpSt {
|
||||
addr: addr,
|
||||
data: data,
|
||||
offset: offset,
|
||||
offset: intrin.base(),
|
||||
access: access,
|
||||
});
|
||||
}
|
||||
|
|
@ -3678,13 +3648,12 @@ impl<'a> ShaderFromNir<'a> {
|
|||
order: MemOrder::Strong(MemScope::CTA),
|
||||
eviction_priority: MemEvictionPriority::Normal,
|
||||
};
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
|
||||
let offset = offset + intrin.base();
|
||||
let addr = self.get_src(&srcs[1]);
|
||||
|
||||
b.push_op(OpSt {
|
||||
addr: addr,
|
||||
data: data,
|
||||
offset: offset,
|
||||
offset: intrin.base(),
|
||||
access: access,
|
||||
});
|
||||
}
|
||||
|
|
@ -3694,14 +3663,14 @@ impl<'a> ShaderFromNir<'a> {
|
|||
(srcs[0].bit_size() / 8) * srcs[0].num_components();
|
||||
let mem_type = MemType::from_size(size_B, false);
|
||||
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
|
||||
let addr = self.get_src(&srcs[1]);
|
||||
let locked = b.alloc_ssa(RegFile::Pred);
|
||||
|
||||
b.push_op(OpStSCheckUnlock {
|
||||
locked: locked.into(),
|
||||
addr,
|
||||
data,
|
||||
offset,
|
||||
offset: intrin.base(),
|
||||
mem_type,
|
||||
});
|
||||
let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
|
||||
|
|
@ -3795,14 +3764,13 @@ impl<'a> ShaderFromNir<'a> {
|
|||
LdsmSize::M8N8
|
||||
};
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, comps);
|
||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||
let offset = offset + intrin.base();
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
b.push_op(OpLdsm {
|
||||
dst: dst.clone().into(),
|
||||
mat_size,
|
||||
mat_count,
|
||||
addr,
|
||||
offset,
|
||||
offset: intrin.base(),
|
||||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1058,6 +1058,9 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
|
|||
return progress;
|
||||
}
|
||||
|
||||
const static struct nir_opt_offsets_options nak_offset_options = {
|
||||
};
|
||||
|
||||
void
|
||||
nak_postprocess_nir(nir_shader *nir,
|
||||
const struct nak_compiler *nak,
|
||||
|
|
@ -1194,6 +1197,7 @@ nak_postprocess_nir(nir_shader *nir,
|
|||
}
|
||||
|
||||
OPT(nir, nak_nir_lower_load_store, nak);
|
||||
OPT(nir, nir_opt_offsets, &nak_offset_options);
|
||||
|
||||
OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options);
|
||||
OPT(nir, nir_lower_int64);
|
||||
|
|
@ -1282,59 +1286,3 @@ nak_postprocess_nir(nir_shader *nir,
|
|||
nir_print_shader(nir, stderr);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
scalar_is_imm_int(nir_scalar x, unsigned bits, bool is_signed)
|
||||
{
|
||||
if (!nir_scalar_is_const(x))
|
||||
return false;
|
||||
|
||||
if (is_signed) {
|
||||
int64_t imm = nir_scalar_as_int(x);
|
||||
return u_intN_min(bits) <= imm && imm <= u_intN_max(bits);
|
||||
} else {
|
||||
return nir_scalar_as_uint(x) < u_uintN_max(bits);
|
||||
}
|
||||
}
|
||||
|
||||
struct nak_io_addr_offset
|
||||
nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits)
|
||||
{
|
||||
nir_scalar addr_s = {
|
||||
.def = addr,
|
||||
.comp = 0,
|
||||
};
|
||||
|
||||
/* If the entire address is constant, it's an unsigned immediate */
|
||||
if (scalar_is_imm_int(addr_s, imm_bits, false)) {
|
||||
/* Base is a dumb name for this. It should be offset */
|
||||
return (struct nak_io_addr_offset) {
|
||||
.offset = nir_scalar_as_int(addr_s),
|
||||
};
|
||||
}
|
||||
|
||||
addr_s = nir_scalar_chase_movs(addr_s);
|
||||
if (!nir_scalar_is_alu(addr_s) ||
|
||||
nir_scalar_alu_op(addr_s) != nir_op_iadd) {
|
||||
return (struct nak_io_addr_offset) {
|
||||
.base = addr_s,
|
||||
};
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
nir_scalar off_s = nir_scalar_chase_alu_src(addr_s, i);
|
||||
off_s = nir_scalar_chase_movs(off_s);
|
||||
|
||||
/* If it's imm+indirect then the immediate is signed */
|
||||
if (scalar_is_imm_int(off_s, imm_bits, true)) {
|
||||
return (struct nak_io_addr_offset) {
|
||||
.base = nir_scalar_chase_alu_src(addr_s, 1 - i),
|
||||
.offset = nir_scalar_as_int(off_s),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return (struct nak_io_addr_offset) {
|
||||
.base = addr_s,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -65,7 +65,8 @@ lower_ldcx_to_global(nir_builder *b, nir_intrinsic_instr *load,
|
|||
nir_iadd(b, addr, nir_u2u64(b, offset)),
|
||||
.align_mul = nir_intrinsic_align_mul(load),
|
||||
.align_offset = nir_intrinsic_align_offset(load),
|
||||
.access = ACCESS_CAN_REORDER);
|
||||
.access = ACCESS_CAN_REORDER,
|
||||
.base = nir_intrinsic_base(load));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
val = nir_if_phi(b, val, zero);
|
||||
|
|
|
|||
|
|
@ -140,14 +140,6 @@ struct nak_xfb_info
|
|||
nak_xfb_from_nir(const struct nak_compiler *nak,
|
||||
const struct nir_xfb_info *nir_xfb);
|
||||
|
||||
struct nak_io_addr_offset {
|
||||
nir_scalar base;
|
||||
int32_t offset;
|
||||
};
|
||||
|
||||
struct nak_io_addr_offset
|
||||
nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits);
|
||||
|
||||
enum nak_nir_tex_ref_type {
|
||||
/** Indicates that this is a bindless texture */
|
||||
NAK_NIR_TEX_REF_TYPE_BINDLESS,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue