mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 13:28:06 +02:00
nak: replace get_io_addr_offset with nir_opt_offsets
Totals: Totals: CodeSize: 9521188272 -> 9474779520 (-0.49%); split: -0.50%, +0.01% Number of GPRs: 47361498 -> 47340754 (-0.04%); split: -0.05%, +0.00% SLM Size: 5444552 -> 5444436 (-0.00%) Static cycle count: 6182267636 -> 6141873245 (-0.65%); split: -0.69%, +0.03% Spills to memory: 44288 -> 44241 (-0.11%) Fills from memory: 44288 -> 44241 (-0.11%) Spills to reg: 185307 -> 185246 (-0.03%); split: -0.06%, +0.03% Fills from reg: 225943 -> 225895 (-0.02%); split: -0.04%, +0.01% Max warps/SM: 50637496 -> 50646924 (+0.02%); split: +0.02%, -0.00% Totals from 118675 (10.20% of 1163204) affected shaders: CodeSize: 2675917792 -> 2629509040 (-1.73%); split: -1.77%, +0.04% Number of GPRs: 7190170 -> 7169426 (-0.29%); split: -0.32%, +0.03% SLM Size: 2694216 -> 2694100 (-0.00%) Static cycle count: 3780817453 -> 3740423062 (-1.07%); split: -1.12%, +0.05% Spills to memory: 40938 -> 40891 (-0.11%) Fills from memory: 40938 -> 40891 (-0.11%) Spills to reg: 78989 -> 78928 (-0.08%); split: -0.14%, +0.06% Fills from reg: 83274 -> 83226 (-0.06%); split: -0.10%, +0.04% Max warps/SM: 4219736 -> 4229164 (+0.22%); split: +0.23%, -0.01% Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39525>
This commit is contained in:
parent
e5bf1f5aff
commit
5890aedf8c
4 changed files with 40 additions and 131 deletions
|
|
@ -460,35 +460,6 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
self.get_ssa_ref(src).into()
|
self.get_ssa_ref(src).into()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_io_addr_offset(
|
|
||||||
&mut self,
|
|
||||||
addr: &nir_src,
|
|
||||||
imm_bits: u8,
|
|
||||||
) -> (Src, i32) {
|
|
||||||
let addr = addr.as_def();
|
|
||||||
let addr_offset = unsafe {
|
|
||||||
nak_get_io_addr_offset(addr as *const _ as *mut _, imm_bits)
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(base_def) = std::ptr::NonNull::new(addr_offset.base.def) {
|
|
||||||
let base_def = unsafe { base_def.as_ref() };
|
|
||||||
let base_comp = u8::try_from(addr_offset.base.comp).unwrap();
|
|
||||||
let (base, _) = self.get_ssa_comp(base_def, base_comp);
|
|
||||||
(base.into(), addr_offset.offset)
|
|
||||||
} else {
|
|
||||||
(SrcRef::Zero.into(), addr_offset.offset)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_cbuf_addr_offset(&mut self, addr: &nir_src) -> (Src, u16) {
|
|
||||||
let (off, off_imm) = self.get_io_addr_offset(addr, 16);
|
|
||||||
if let Ok(off_imm_u16) = u16::try_from(off_imm) {
|
|
||||||
(off, off_imm_u16)
|
|
||||||
} else {
|
|
||||||
(self.get_src(addr), 0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn set_dst(&mut self, def: &nir_def, ssa: SSARef) {
|
fn set_dst(&mut self, def: &nir_def, ssa: SSARef) {
|
||||||
self.set_ssa(def, (*ssa).into());
|
self.set_ssa(def, (*ssa).into());
|
||||||
}
|
}
|
||||||
|
|
@ -2971,7 +2942,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
}
|
}
|
||||||
nir_intrinsic_global_atomic_nv => {
|
nir_intrinsic_global_atomic_nv => {
|
||||||
let bit_size = intrin.def.bit_size();
|
let bit_size = intrin.def.bit_size();
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let addr = self.get_src(&srcs[0]);
|
||||||
let data = self.get_src(&srcs[1]);
|
let data = self.get_src(&srcs[1]);
|
||||||
let atom_type = self.get_atomic_type(intrin);
|
let atom_type = self.get_atomic_type(intrin);
|
||||||
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
||||||
|
|
@ -2993,7 +2964,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
data: data,
|
data: data,
|
||||||
atom_op: atom_op,
|
atom_op: atom_op,
|
||||||
atom_type: atom_type,
|
atom_type: atom_type,
|
||||||
addr_offset: offset,
|
addr_offset: intrin.base(),
|
||||||
mem_space: MemSpace::Global(MemAddrType::A64),
|
mem_space: MemSpace::Global(MemAddrType::A64),
|
||||||
mem_order: MemOrder::Strong(MemScope::GPU),
|
mem_order: MemOrder::Strong(MemScope::GPU),
|
||||||
mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
|
mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
|
||||||
|
|
@ -3003,7 +2974,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
nir_intrinsic_global_atomic_swap_nv => {
|
nir_intrinsic_global_atomic_swap_nv => {
|
||||||
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
|
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
|
||||||
let bit_size = intrin.def.bit_size();
|
let bit_size = intrin.def.bit_size();
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let addr = self.get_src(&srcs[0]);
|
||||||
let cmpr = self.get_src(&srcs[1]);
|
let cmpr = self.get_src(&srcs[1]);
|
||||||
let data = self.get_src(&srcs[2]);
|
let data = self.get_src(&srcs[2]);
|
||||||
let atom_type = AtomType::U(bit_size);
|
let atom_type = AtomType::U(bit_size);
|
||||||
|
|
@ -3018,7 +2989,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
data: data,
|
data: data,
|
||||||
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
|
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
|
||||||
atom_type: atom_type,
|
atom_type: atom_type,
|
||||||
addr_offset: offset,
|
addr_offset: intrin.base(),
|
||||||
mem_space: MemSpace::Global(MemAddrType::A64),
|
mem_space: MemSpace::Global(MemAddrType::A64),
|
||||||
mem_order: MemOrder::Strong(MemScope::GPU),
|
mem_order: MemOrder::Strong(MemScope::GPU),
|
||||||
mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
|
mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
|
||||||
|
|
@ -3093,12 +3064,11 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
}
|
}
|
||||||
nir_intrinsic_vild_nv => {
|
nir_intrinsic_vild_nv => {
|
||||||
let dst = b.alloc_ssa(RegFile::GPR);
|
let dst = b.alloc_ssa(RegFile::GPR);
|
||||||
|
let idx = self.get_src(&srcs[0]);
|
||||||
let (idx, off) = self.get_io_addr_offset(&srcs[0], 8);
|
|
||||||
b.push_op(OpViLd {
|
b.push_op(OpViLd {
|
||||||
dst: dst.into(),
|
dst: dst.into(),
|
||||||
idx,
|
idx,
|
||||||
off: off.try_into().unwrap(),
|
off: intrin.base().try_into().unwrap(),
|
||||||
});
|
});
|
||||||
self.set_dst(&intrin.def, dst.into());
|
self.set_dst(&intrin.def, dst.into());
|
||||||
}
|
}
|
||||||
|
|
@ -3122,13 +3092,13 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
eviction_priority: self
|
eviction_priority: self
|
||||||
.get_eviction_priority(intrin.access()),
|
.get_eviction_priority(intrin.access()),
|
||||||
};
|
};
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let addr = self.get_src(&srcs[0]);
|
||||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||||
|
|
||||||
b.push_op(OpLd {
|
b.push_op(OpLd {
|
||||||
dst: dst.clone().into(),
|
dst: dst.clone().into(),
|
||||||
addr: addr,
|
addr: addr,
|
||||||
offset: offset,
|
offset: intrin.base(),
|
||||||
access: access,
|
access: access,
|
||||||
});
|
});
|
||||||
self.set_dst(&intrin.def, dst);
|
self.set_dst(&intrin.def, dst);
|
||||||
|
|
@ -3231,13 +3201,13 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
order: MemOrder::Strong(MemScope::CTA),
|
order: MemOrder::Strong(MemScope::CTA),
|
||||||
eviction_priority: MemEvictionPriority::Normal,
|
eviction_priority: MemEvictionPriority::Normal,
|
||||||
};
|
};
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let addr = self.get_src(&srcs[0]);
|
||||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||||
|
|
||||||
b.push_op(OpLd {
|
b.push_op(OpLd {
|
||||||
dst: dst.clone().into(),
|
dst: dst.clone().into(),
|
||||||
addr: addr,
|
addr: addr,
|
||||||
offset: offset,
|
offset: intrin.base(),
|
||||||
access: access,
|
access: access,
|
||||||
});
|
});
|
||||||
self.set_dst(&intrin.def, dst);
|
self.set_dst(&intrin.def, dst);
|
||||||
|
|
@ -3252,14 +3222,13 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
order: MemOrder::Strong(MemScope::CTA),
|
order: MemOrder::Strong(MemScope::CTA),
|
||||||
eviction_priority: MemEvictionPriority::Normal,
|
eviction_priority: MemEvictionPriority::Normal,
|
||||||
};
|
};
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let addr = self.get_src(&srcs[0]);
|
||||||
let offset = offset + intrin.base();
|
|
||||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||||
|
|
||||||
b.push_op(OpLd {
|
b.push_op(OpLd {
|
||||||
dst: dst.clone().into(),
|
dst: dst.clone().into(),
|
||||||
addr: addr,
|
addr: addr,
|
||||||
offset: offset,
|
offset: intrin.base(),
|
||||||
access: access,
|
access: access,
|
||||||
});
|
});
|
||||||
self.set_dst(&intrin.def, dst);
|
self.set_dst(&intrin.def, dst);
|
||||||
|
|
@ -3268,7 +3237,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
let size_B = intrin.def.bit_size() / 8;
|
let size_B = intrin.def.bit_size() / 8;
|
||||||
let mem_type = MemType::from_size(size_B, false);
|
let mem_type = MemType::from_size(size_B, false);
|
||||||
|
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let addr = self.get_src(&srcs[0]);
|
||||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||||
let locked = b.alloc_ssa(RegFile::Pred);
|
let locked = b.alloc_ssa(RegFile::Pred);
|
||||||
|
|
||||||
|
|
@ -3276,7 +3245,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
dst: dst.clone().into(),
|
dst: dst.clone().into(),
|
||||||
locked: locked.into(),
|
locked: locked.into(),
|
||||||
addr,
|
addr,
|
||||||
offset,
|
offset: intrin.base(),
|
||||||
mem_type,
|
mem_type,
|
||||||
});
|
});
|
||||||
let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
|
let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
|
||||||
|
|
@ -3319,7 +3288,8 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
(intrin.def.bit_size() / 8) * intrin.def.num_components();
|
(intrin.def.bit_size() / 8) * intrin.def.num_components();
|
||||||
let idx = &srcs[0];
|
let idx = &srcs[0];
|
||||||
|
|
||||||
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
|
let off = self.get_src(&srcs[1]);
|
||||||
|
let off_imm = intrin.base() as u16;
|
||||||
|
|
||||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||||
|
|
||||||
|
|
@ -3329,7 +3299,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
buf: CBuf::Binding(idx_imm),
|
buf: CBuf::Binding(idx_imm),
|
||||||
offset: off_imm,
|
offset: off_imm,
|
||||||
};
|
};
|
||||||
if off.is_zero() {
|
if srcs[1].is_zero() {
|
||||||
for (i, comp) in dst.iter().enumerate() {
|
for (i, comp) in dst.iter().enumerate() {
|
||||||
let i = u16::try_from(i).unwrap();
|
let i = u16::try_from(i).unwrap();
|
||||||
b.copy_to(
|
b.copy_to(
|
||||||
|
|
@ -3376,15 +3346,15 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
(intrin.def.bit_size() / 8) * intrin.def.num_components();
|
(intrin.def.bit_size() / 8) * intrin.def.num_components();
|
||||||
|
|
||||||
let handle = self.get_ssa_ref(&srcs[0]);
|
let handle = self.get_ssa_ref(&srcs[0]);
|
||||||
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
|
let off = self.get_src(&srcs[1]);
|
||||||
|
|
||||||
let cb = CBufRef {
|
let cb = CBufRef {
|
||||||
buf: CBuf::BindlessSSA(handle[..].try_into().unwrap()),
|
buf: CBuf::BindlessSSA(handle[..].try_into().unwrap()),
|
||||||
offset: off_imm,
|
offset: intrin.base() as u16,
|
||||||
};
|
};
|
||||||
|
|
||||||
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
|
||||||
if off.is_zero() {
|
if srcs[1].is_zero() {
|
||||||
for (i, comp) in dst.iter().enumerate() {
|
for (i, comp) in dst.iter().enumerate() {
|
||||||
let i = u16::try_from(i).unwrap();
|
let i = u16::try_from(i).unwrap();
|
||||||
b.copy_to(
|
b.copy_to(
|
||||||
|
|
@ -3565,7 +3535,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
}
|
}
|
||||||
nir_intrinsic_shared_atomic_nv => {
|
nir_intrinsic_shared_atomic_nv => {
|
||||||
let bit_size = intrin.def.bit_size();
|
let bit_size = intrin.def.bit_size();
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let addr = self.get_src(&srcs[0]);
|
||||||
let data = self.get_src(&srcs[1]);
|
let data = self.get_src(&srcs[1]);
|
||||||
let atom_type = self.get_atomic_type(intrin);
|
let atom_type = self.get_atomic_type(intrin);
|
||||||
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
||||||
|
|
@ -3580,7 +3550,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
data: data,
|
data: data,
|
||||||
atom_op: atom_op,
|
atom_op: atom_op,
|
||||||
atom_type: atom_type,
|
atom_type: atom_type,
|
||||||
addr_offset: offset,
|
addr_offset: intrin.base(),
|
||||||
mem_space: MemSpace::Shared,
|
mem_space: MemSpace::Shared,
|
||||||
mem_order: MemOrder::Strong(MemScope::CTA),
|
mem_order: MemOrder::Strong(MemScope::CTA),
|
||||||
mem_eviction_priority: MemEvictionPriority::Normal,
|
mem_eviction_priority: MemEvictionPriority::Normal,
|
||||||
|
|
@ -3590,7 +3560,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
nir_intrinsic_shared_atomic_swap_nv => {
|
nir_intrinsic_shared_atomic_swap_nv => {
|
||||||
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
|
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
|
||||||
let bit_size = intrin.def.bit_size();
|
let bit_size = intrin.def.bit_size();
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let addr = self.get_src(&srcs[0]);
|
||||||
let cmpr = self.get_src(&srcs[1]);
|
let cmpr = self.get_src(&srcs[1]);
|
||||||
let data = self.get_src(&srcs[2]);
|
let data = self.get_src(&srcs[2]);
|
||||||
let atom_type = AtomType::U(bit_size);
|
let atom_type = AtomType::U(bit_size);
|
||||||
|
|
@ -3605,7 +3575,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
data: data,
|
data: data,
|
||||||
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
|
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
|
||||||
atom_type: atom_type,
|
atom_type: atom_type,
|
||||||
addr_offset: offset,
|
addr_offset: intrin.base(),
|
||||||
mem_space: MemSpace::Shared,
|
mem_space: MemSpace::Shared,
|
||||||
mem_order: MemOrder::Strong(MemScope::CTA),
|
mem_order: MemOrder::Strong(MemScope::CTA),
|
||||||
mem_eviction_priority: MemEvictionPriority::Normal,
|
mem_eviction_priority: MemEvictionPriority::Normal,
|
||||||
|
|
@ -3628,12 +3598,12 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
eviction_priority: self
|
eviction_priority: self
|
||||||
.get_eviction_priority(intrin.access()),
|
.get_eviction_priority(intrin.access()),
|
||||||
};
|
};
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
|
let addr = self.get_src(&srcs[1]);
|
||||||
|
|
||||||
b.push_op(OpSt {
|
b.push_op(OpSt {
|
||||||
addr: addr,
|
addr: addr,
|
||||||
data: data,
|
data: data,
|
||||||
offset: offset,
|
offset: intrin.base(),
|
||||||
access: access,
|
access: access,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -3658,12 +3628,12 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
order: MemOrder::Strong(MemScope::CTA),
|
order: MemOrder::Strong(MemScope::CTA),
|
||||||
eviction_priority: MemEvictionPriority::Normal,
|
eviction_priority: MemEvictionPriority::Normal,
|
||||||
};
|
};
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
|
let addr = self.get_src(&srcs[1]);
|
||||||
|
|
||||||
b.push_op(OpSt {
|
b.push_op(OpSt {
|
||||||
addr: addr,
|
addr: addr,
|
||||||
data: data,
|
data: data,
|
||||||
offset: offset,
|
offset: intrin.base(),
|
||||||
access: access,
|
access: access,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -3678,13 +3648,12 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
order: MemOrder::Strong(MemScope::CTA),
|
order: MemOrder::Strong(MemScope::CTA),
|
||||||
eviction_priority: MemEvictionPriority::Normal,
|
eviction_priority: MemEvictionPriority::Normal,
|
||||||
};
|
};
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
|
let addr = self.get_src(&srcs[1]);
|
||||||
let offset = offset + intrin.base();
|
|
||||||
|
|
||||||
b.push_op(OpSt {
|
b.push_op(OpSt {
|
||||||
addr: addr,
|
addr: addr,
|
||||||
data: data,
|
data: data,
|
||||||
offset: offset,
|
offset: intrin.base(),
|
||||||
access: access,
|
access: access,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -3694,14 +3663,14 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
(srcs[0].bit_size() / 8) * srcs[0].num_components();
|
(srcs[0].bit_size() / 8) * srcs[0].num_components();
|
||||||
let mem_type = MemType::from_size(size_B, false);
|
let mem_type = MemType::from_size(size_B, false);
|
||||||
|
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
|
let addr = self.get_src(&srcs[1]);
|
||||||
let locked = b.alloc_ssa(RegFile::Pred);
|
let locked = b.alloc_ssa(RegFile::Pred);
|
||||||
|
|
||||||
b.push_op(OpStSCheckUnlock {
|
b.push_op(OpStSCheckUnlock {
|
||||||
locked: locked.into(),
|
locked: locked.into(),
|
||||||
addr,
|
addr,
|
||||||
data,
|
data,
|
||||||
offset,
|
offset: intrin.base(),
|
||||||
mem_type,
|
mem_type,
|
||||||
});
|
});
|
||||||
let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
|
let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
|
||||||
|
|
@ -3795,14 +3764,13 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
LdsmSize::M8N8
|
LdsmSize::M8N8
|
||||||
};
|
};
|
||||||
let dst = b.alloc_ssa_vec(RegFile::GPR, comps);
|
let dst = b.alloc_ssa_vec(RegFile::GPR, comps);
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let addr = self.get_src(&srcs[0]);
|
||||||
let offset = offset + intrin.base();
|
|
||||||
b.push_op(OpLdsm {
|
b.push_op(OpLdsm {
|
||||||
dst: dst.clone().into(),
|
dst: dst.clone().into(),
|
||||||
mat_size,
|
mat_size,
|
||||||
mat_count,
|
mat_count,
|
||||||
addr,
|
addr,
|
||||||
offset,
|
offset: intrin.base(),
|
||||||
});
|
});
|
||||||
self.set_dst(&intrin.def, dst);
|
self.set_dst(&intrin.def, dst);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1058,6 +1058,9 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
|
||||||
return progress;
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const static struct nir_opt_offsets_options nak_offset_options = {
|
||||||
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
nak_postprocess_nir(nir_shader *nir,
|
nak_postprocess_nir(nir_shader *nir,
|
||||||
const struct nak_compiler *nak,
|
const struct nak_compiler *nak,
|
||||||
|
|
@ -1194,6 +1197,7 @@ nak_postprocess_nir(nir_shader *nir,
|
||||||
}
|
}
|
||||||
|
|
||||||
OPT(nir, nak_nir_lower_load_store, nak);
|
OPT(nir, nak_nir_lower_load_store, nak);
|
||||||
|
OPT(nir, nir_opt_offsets, &nak_offset_options);
|
||||||
|
|
||||||
OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options);
|
OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options);
|
||||||
OPT(nir, nir_lower_int64);
|
OPT(nir, nir_lower_int64);
|
||||||
|
|
@ -1282,59 +1286,3 @@ nak_postprocess_nir(nir_shader *nir,
|
||||||
nir_print_shader(nir, stderr);
|
nir_print_shader(nir, stderr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
|
||||||
scalar_is_imm_int(nir_scalar x, unsigned bits, bool is_signed)
|
|
||||||
{
|
|
||||||
if (!nir_scalar_is_const(x))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (is_signed) {
|
|
||||||
int64_t imm = nir_scalar_as_int(x);
|
|
||||||
return u_intN_min(bits) <= imm && imm <= u_intN_max(bits);
|
|
||||||
} else {
|
|
||||||
return nir_scalar_as_uint(x) < u_uintN_max(bits);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct nak_io_addr_offset
|
|
||||||
nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits)
|
|
||||||
{
|
|
||||||
nir_scalar addr_s = {
|
|
||||||
.def = addr,
|
|
||||||
.comp = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* If the entire address is constant, it's an unsigned immediate */
|
|
||||||
if (scalar_is_imm_int(addr_s, imm_bits, false)) {
|
|
||||||
/* Base is a dumb name for this. It should be offset */
|
|
||||||
return (struct nak_io_addr_offset) {
|
|
||||||
.offset = nir_scalar_as_int(addr_s),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
addr_s = nir_scalar_chase_movs(addr_s);
|
|
||||||
if (!nir_scalar_is_alu(addr_s) ||
|
|
||||||
nir_scalar_alu_op(addr_s) != nir_op_iadd) {
|
|
||||||
return (struct nak_io_addr_offset) {
|
|
||||||
.base = addr_s,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 2; i++) {
|
|
||||||
nir_scalar off_s = nir_scalar_chase_alu_src(addr_s, i);
|
|
||||||
off_s = nir_scalar_chase_movs(off_s);
|
|
||||||
|
|
||||||
/* If it's imm+indirect then the immediate is signed */
|
|
||||||
if (scalar_is_imm_int(off_s, imm_bits, true)) {
|
|
||||||
return (struct nak_io_addr_offset) {
|
|
||||||
.base = nir_scalar_chase_alu_src(addr_s, 1 - i),
|
|
||||||
.offset = nir_scalar_as_int(off_s),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (struct nak_io_addr_offset) {
|
|
||||||
.base = addr_s,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,8 @@ lower_ldcx_to_global(nir_builder *b, nir_intrinsic_instr *load,
|
||||||
nir_iadd(b, addr, nir_u2u64(b, offset)),
|
nir_iadd(b, addr, nir_u2u64(b, offset)),
|
||||||
.align_mul = nir_intrinsic_align_mul(load),
|
.align_mul = nir_intrinsic_align_mul(load),
|
||||||
.align_offset = nir_intrinsic_align_offset(load),
|
.align_offset = nir_intrinsic_align_offset(load),
|
||||||
.access = ACCESS_CAN_REORDER);
|
.access = ACCESS_CAN_REORDER,
|
||||||
|
.base = nir_intrinsic_base(load));
|
||||||
}
|
}
|
||||||
nir_pop_if(b, NULL);
|
nir_pop_if(b, NULL);
|
||||||
val = nir_if_phi(b, val, zero);
|
val = nir_if_phi(b, val, zero);
|
||||||
|
|
|
||||||
|
|
@ -140,14 +140,6 @@ struct nak_xfb_info
|
||||||
nak_xfb_from_nir(const struct nak_compiler *nak,
|
nak_xfb_from_nir(const struct nak_compiler *nak,
|
||||||
const struct nir_xfb_info *nir_xfb);
|
const struct nir_xfb_info *nir_xfb);
|
||||||
|
|
||||||
struct nak_io_addr_offset {
|
|
||||||
nir_scalar base;
|
|
||||||
int32_t offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct nak_io_addr_offset
|
|
||||||
nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits);
|
|
||||||
|
|
||||||
enum nak_nir_tex_ref_type {
|
enum nak_nir_tex_ref_type {
|
||||||
/** Indicates that this is a bindless texture */
|
/** Indicates that this is a bindless texture */
|
||||||
NAK_NIR_TEX_REF_TYPE_BINDLESS,
|
NAK_NIR_TEX_REF_TYPE_BINDLESS,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue