mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
nak: Return SSAValue from builder where possible
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34794>
This commit is contained in:
parent
b9e9a811b8
commit
ffe438c77d
3 changed files with 127 additions and 120 deletions
|
|
@ -120,7 +120,7 @@ pub trait SSABuilder: Builder {
|
|||
fn alloc_ssa(&mut self, file: RegFile) -> SSAValue;
|
||||
fn alloc_ssa_vec(&mut self, file: RegFile, comps: u8) -> SSARef;
|
||||
|
||||
fn shl(&mut self, x: Src, shift: Src) -> SSARef {
|
||||
fn shl(&mut self, x: Src, shift: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpShf {
|
||||
|
|
@ -141,7 +141,7 @@ pub trait SSABuilder: Builder {
|
|||
wrap: true,
|
||||
});
|
||||
}
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn shl64(&mut self, x: Src, shift: Src) -> SSARef {
|
||||
|
|
@ -188,7 +188,7 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn shr(&mut self, x: Src, shift: Src, signed: bool) -> SSARef {
|
||||
fn shr(&mut self, x: Src, shift: Src, signed: bool) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpShf {
|
||||
|
|
@ -210,7 +210,7 @@ pub trait SSABuilder: Builder {
|
|||
signed,
|
||||
});
|
||||
}
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn shr64(&mut self, x: Src, shift: Src, signed: bool) -> SSARef {
|
||||
|
|
@ -241,7 +241,7 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn fadd(&mut self, x: Src, y: Src) -> SSARef {
|
||||
fn fadd(&mut self, x: Src, y: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpFAdd {
|
||||
dst: dst.into(),
|
||||
|
|
@ -250,10 +250,10 @@ pub trait SSABuilder: Builder {
|
|||
rnd_mode: FRndMode::NearestEven,
|
||||
ftz: false,
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn fmul(&mut self, x: Src, y: Src) -> SSARef {
|
||||
fn fmul(&mut self, x: Src, y: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpFMul {
|
||||
dst: dst.into(),
|
||||
|
|
@ -263,10 +263,10 @@ pub trait SSABuilder: Builder {
|
|||
ftz: false,
|
||||
dnz: false,
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn fset(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
|
||||
fn fset(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpFSet {
|
||||
dst: dst.into(),
|
||||
|
|
@ -274,10 +274,10 @@ pub trait SSABuilder: Builder {
|
|||
srcs: [x, y],
|
||||
ftz: false,
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn fsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
|
||||
fn fsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::Pred);
|
||||
self.push_op(OpFSetP {
|
||||
dst: dst.into(),
|
||||
|
|
@ -287,10 +287,10 @@ pub trait SSABuilder: Builder {
|
|||
accum: SrcRef::True.into(),
|
||||
ftz: false,
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn hadd2(&mut self, x: Src, y: Src) -> SSARef {
|
||||
fn hadd2(&mut self, x: Src, y: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpHAdd2 {
|
||||
dst: dst.into(),
|
||||
|
|
@ -299,7 +299,7 @@ pub trait SSABuilder: Builder {
|
|||
ftz: false,
|
||||
f32: false,
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn hset2(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
|
||||
|
|
@ -315,7 +315,7 @@ pub trait SSABuilder: Builder {
|
|||
dst.into()
|
||||
}
|
||||
|
||||
fn dsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
|
||||
fn dsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::Pred);
|
||||
self.push_op(OpDSetP {
|
||||
dst: dst.into(),
|
||||
|
|
@ -324,10 +324,10 @@ pub trait SSABuilder: Builder {
|
|||
srcs: [x, y],
|
||||
accum: SrcRef::True.into(),
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn iabs(&mut self, i: Src) -> SSARef {
|
||||
fn iabs(&mut self, i: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpIAbs {
|
||||
|
|
@ -345,10 +345,10 @@ pub trait SSABuilder: Builder {
|
|||
neg: false,
|
||||
});
|
||||
}
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn iadd(&mut self, x: Src, y: Src, z: Src) -> SSARef {
|
||||
fn iadd(&mut self, x: Src, y: Src, z: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpIAdd3 {
|
||||
|
|
@ -364,7 +364,7 @@ pub trait SSABuilder: Builder {
|
|||
carry_out: Dst::None,
|
||||
});
|
||||
}
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef {
|
||||
|
|
@ -428,7 +428,7 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn imnmx(&mut self, tp: IntCmpType, x: Src, y: Src, min: Src) -> SSARef {
|
||||
fn imnmx(&mut self, tp: IntCmpType, x: Src, y: Src, min: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpIMnMx {
|
||||
dst: dst.into(),
|
||||
|
|
@ -436,10 +436,10 @@ pub trait SSABuilder: Builder {
|
|||
srcs: [x, y],
|
||||
min: min,
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn imul(&mut self, x: Src, y: Src) -> SSARef {
|
||||
fn imul(&mut self, x: Src, y: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpIMad {
|
||||
|
|
@ -455,7 +455,7 @@ pub trait SSABuilder: Builder {
|
|||
high: false,
|
||||
});
|
||||
}
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn imul_2x32_64(&mut self, x: Src, y: Src, signed: bool) -> SSARef {
|
||||
|
|
@ -483,7 +483,7 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn ineg(&mut self, i: Src) -> SSARef {
|
||||
fn ineg(&mut self, i: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpIAdd3 {
|
||||
|
|
@ -498,7 +498,7 @@ pub trait SSABuilder: Builder {
|
|||
carry_out: Dst::None,
|
||||
});
|
||||
}
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn ineg64(&mut self, x: Src) -> SSARef {
|
||||
|
|
@ -511,7 +511,7 @@ pub trait SSABuilder: Builder {
|
|||
cmp_op: IntCmpOp,
|
||||
x: Src,
|
||||
y: Src,
|
||||
) -> SSARef {
|
||||
) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::Pred);
|
||||
self.push_op(OpISetP {
|
||||
dst: dst.into(),
|
||||
|
|
@ -523,7 +523,7 @@ pub trait SSABuilder: Builder {
|
|||
accum: true.into(),
|
||||
low_cmp: true.into(),
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn isetp64(
|
||||
|
|
@ -611,7 +611,7 @@ pub trait SSABuilder: Builder {
|
|||
dst.into()
|
||||
}
|
||||
|
||||
fn lea(&mut self, a: Src, b: Src, shift: u8) -> SSARef {
|
||||
fn lea(&mut self, a: Src, b: Src, shift: u8) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
assert!(self.sm() >= 70);
|
||||
|
||||
|
|
@ -626,7 +626,7 @@ pub trait SSABuilder: Builder {
|
|||
intermediate_mod: SrcMod::None,
|
||||
});
|
||||
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn lea64(&mut self, a: Src, b: Src, shift: u8) -> SSARef {
|
||||
|
|
@ -677,17 +677,17 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef {
|
||||
fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSAValue {
|
||||
let dst = if x.is_predicate() {
|
||||
self.alloc_ssa(RegFile::Pred)
|
||||
} else {
|
||||
self.alloc_ssa(RegFile::GPR)
|
||||
};
|
||||
self.lop2_to(dst.into(), op, x, y);
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn brev(&mut self, x: Src) -> SSARef {
|
||||
fn brev(&mut self, x: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpBRev {
|
||||
|
|
@ -704,20 +704,20 @@ pub trait SSABuilder: Builder {
|
|||
reverse: true,
|
||||
});
|
||||
}
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn mufu(&mut self, op: MuFuOp, src: Src) -> SSARef {
|
||||
fn mufu(&mut self, op: MuFuOp, src: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpMuFu {
|
||||
dst: dst.into(),
|
||||
op: op,
|
||||
src: src,
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn fsin(&mut self, src: Src) -> SSARef {
|
||||
fn fsin(&mut self, src: Src) -> SSAValue {
|
||||
let tmp = if self.sm() >= 70 {
|
||||
let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
|
||||
self.fmul(src, frac_1_2pi.into())
|
||||
|
|
@ -733,7 +733,7 @@ pub trait SSABuilder: Builder {
|
|||
self.mufu(MuFuOp::Sin, tmp.into())
|
||||
}
|
||||
|
||||
fn fcos(&mut self, src: Src) -> SSARef {
|
||||
fn fcos(&mut self, src: Src) -> SSAValue {
|
||||
let tmp = if self.sm() >= 70 {
|
||||
let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
|
||||
self.fmul(src, frac_1_2pi.into())
|
||||
|
|
@ -749,7 +749,7 @@ pub trait SSABuilder: Builder {
|
|||
self.mufu(MuFuOp::Cos, tmp.into())
|
||||
}
|
||||
|
||||
fn fexp2(&mut self, src: Src) -> SSARef {
|
||||
fn fexp2(&mut self, src: Src) -> SSAValue {
|
||||
let tmp = if self.sm() >= 70 {
|
||||
src
|
||||
} else {
|
||||
|
|
@ -764,13 +764,13 @@ pub trait SSABuilder: Builder {
|
|||
self.mufu(MuFuOp::Exp2, tmp)
|
||||
}
|
||||
|
||||
fn prmt(&mut self, x: Src, y: Src, sel: [u8; 4]) -> SSARef {
|
||||
fn prmt(&mut self, x: Src, y: Src, sel: [u8; 4]) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.prmt_to(dst.into(), x, y, sel);
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn prmt4(&mut self, src: [Src; 4], sel: [u8; 4]) -> SSARef {
|
||||
fn prmt4(&mut self, src: [Src; 4], sel: [u8; 4]) -> SSAValue {
|
||||
let max_sel = *sel.iter().max().unwrap();
|
||||
if max_sel < 8 {
|
||||
self.prmt(src[0], src[1], sel)
|
||||
|
|
@ -808,7 +808,7 @@ pub trait SSABuilder: Builder {
|
|||
}
|
||||
}
|
||||
|
||||
fn sel(&mut self, cond: Src, x: Src, y: Src) -> SSARef {
|
||||
fn sel(&mut self, cond: Src, x: Src, y: Src) -> SSAValue {
|
||||
assert!(cond.src_ref.is_predicate());
|
||||
assert!(x.is_predicate() == y.is_predicate());
|
||||
if x.is_predicate() {
|
||||
|
|
@ -835,7 +835,7 @@ pub trait SSABuilder: Builder {
|
|||
srcs: [cond.bnot(), y, tmp.into()],
|
||||
});
|
||||
}
|
||||
dst.into()
|
||||
dst
|
||||
} else {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpSel {
|
||||
|
|
@ -843,17 +843,17 @@ pub trait SSABuilder: Builder {
|
|||
cond: cond,
|
||||
srcs: [x, y],
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
}
|
||||
|
||||
fn undef(&mut self) -> SSARef {
|
||||
fn undef(&mut self) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpUndef { dst: dst.into() });
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn copy(&mut self, src: Src) -> SSARef {
|
||||
fn copy(&mut self, src: Src) -> SSAValue {
|
||||
let dst = if src.is_predicate() {
|
||||
self.alloc_ssa(RegFile::Pred)
|
||||
} else {
|
||||
|
|
@ -863,7 +863,7 @@ pub trait SSABuilder: Builder {
|
|||
dst.into()
|
||||
}
|
||||
|
||||
fn bmov_to_bar(&mut self, src: Src) -> SSARef {
|
||||
fn bmov_to_bar(&mut self, src: Src) -> SSAValue {
|
||||
assert!(src.src_ref.as_ssa().unwrap().file() == Some(RegFile::GPR));
|
||||
let dst = self.alloc_ssa(RegFile::Bar);
|
||||
self.push_op(OpBMov {
|
||||
|
|
@ -871,10 +871,10 @@ pub trait SSABuilder: Builder {
|
|||
src: src,
|
||||
clear: false,
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
|
||||
fn bmov_to_gpr(&mut self, src: Src) -> SSARef {
|
||||
fn bmov_to_gpr(&mut self, src: Src) -> SSAValue {
|
||||
assert!(src.src_ref.as_ssa().unwrap().file() == Some(RegFile::Bar));
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpBMov {
|
||||
|
|
@ -882,7 +882,7 @@ pub trait SSABuilder: Builder {
|
|||
src: src,
|
||||
clear: false,
|
||||
});
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -563,7 +563,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
// we'll need a copy in that case. If the copy
|
||||
// isn't needed, copy-prop should clean it up for
|
||||
// us.
|
||||
comps.push(b.copy(ssa.into())[0]);
|
||||
comps.push(b.copy(ssa.into()));
|
||||
}
|
||||
}
|
||||
8 => {
|
||||
|
|
@ -603,7 +603,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
res
|
||||
};
|
||||
|
||||
comps.push(b.prmt4(psrc, psel)[0]);
|
||||
comps.push(b.prmt4(psrc, psel));
|
||||
}
|
||||
}
|
||||
16 => {
|
||||
|
|
@ -620,7 +620,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
psel[w * 2 + 1] = (w as u8 * 4) + byte + 1;
|
||||
}
|
||||
}
|
||||
comps.push(b.prmt(psrc[0], psrc[1], psel)[0]);
|
||||
comps.push(b.prmt(psrc[0], psrc[1], psel));
|
||||
}
|
||||
}
|
||||
_ => panic!("Unknown bit size: {src_bit_size}"),
|
||||
|
|
@ -706,25 +706,28 @@ impl<'a> ShaderFromNir<'a> {
|
|||
nir_op_b2b1 => {
|
||||
assert!(alu.get_src(0).bit_size() == 32);
|
||||
b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], 0.into())
|
||||
.into()
|
||||
}
|
||||
nir_op_b2b32 | nir_op_b2i8 | nir_op_b2i16 | nir_op_b2i32 => {
|
||||
b.sel(srcs[0].bnot(), 0.into(), 1.into())
|
||||
b.sel(srcs[0].bnot(), 0.into(), 1.into()).into()
|
||||
}
|
||||
nir_op_b2i64 => {
|
||||
let lo = b.sel(srcs[0].bnot(), 0.into(), 1.into());
|
||||
let hi = b.copy(0.into());
|
||||
[lo[0], hi[0]].into()
|
||||
[lo, hi].into()
|
||||
}
|
||||
nir_op_b2f16 => {
|
||||
b.sel(srcs[0].bnot(), 0.into(), 0x3c00.into()).into()
|
||||
}
|
||||
nir_op_b2f16 => b.sel(srcs[0].bnot(), 0.into(), 0x3c00.into()),
|
||||
nir_op_b2f32 => {
|
||||
b.sel(srcs[0].bnot(), 0.0_f32.into(), 1.0_f32.into())
|
||||
b.sel(srcs[0].bnot(), 0.0_f32.into(), 1.0_f32.into()).into()
|
||||
}
|
||||
nir_op_b2f64 => {
|
||||
let lo = b.copy(0.into());
|
||||
let hi = b.sel(srcs[0].bnot(), 0.into(), 0x3ff00000.into());
|
||||
[lo[0], hi[0]].into()
|
||||
[lo, hi].into()
|
||||
}
|
||||
nir_op_bcsel => b.sel(srcs[0], srcs[1], srcs[2]),
|
||||
nir_op_bcsel => b.sel(srcs[0], srcs[1], srcs[2]).into(),
|
||||
nir_op_bfm => {
|
||||
let dst = b.alloc_ssa(RegFile::GPR);
|
||||
b.push_op(OpBMsk {
|
||||
|
|
@ -743,7 +746,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
});
|
||||
dst.into()
|
||||
}
|
||||
nir_op_bitfield_reverse => b.brev(srcs[0]),
|
||||
nir_op_bitfield_reverse => b.brev(srcs[0]).into(),
|
||||
nir_op_ibitfield_extract | nir_op_ubitfield_extract => {
|
||||
let range = b.alloc_ssa(RegFile::GPR);
|
||||
b.push_op(OpPrmt {
|
||||
|
|
@ -796,6 +799,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
_ => panic!("Unknown extract op: {}", alu.op),
|
||||
}
|
||||
.into()
|
||||
}
|
||||
nir_op_f2f16 | nir_op_f2f16_rtne | nir_op_f2f16_rtz
|
||||
| nir_op_f2f32 | nir_op_f2f64 => {
|
||||
|
|
@ -960,7 +964,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
dst.into()
|
||||
}
|
||||
nir_op_fcos => b.fcos(srcs[0]),
|
||||
nir_op_fcos => b.fcos(srcs[0]).into(),
|
||||
nir_op_feq | nir_op_fge | nir_op_flt | nir_op_fneu => {
|
||||
let src_type =
|
||||
FloatType::from_bits(alu.get_src(0).bit_size().into());
|
||||
|
|
@ -1025,7 +1029,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
dst
|
||||
}
|
||||
nir_op_fexp2 => b.fexp2(srcs[0]),
|
||||
nir_op_fexp2 => b.fexp2(srcs[0]).into(),
|
||||
nir_op_ffma => {
|
||||
let ftype = FloatType::from_bits(alu.def.bit_size().into());
|
||||
let dst;
|
||||
|
|
@ -1091,7 +1095,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
nir_op_flog2 => {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.mufu(MuFuOp::Log2, srcs[0])
|
||||
b.mufu(MuFuOp::Log2, srcs[0]).into()
|
||||
}
|
||||
nir_op_fmax | nir_op_fmin => {
|
||||
let dst;
|
||||
|
|
@ -1222,22 +1226,23 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.lop2(LogicOp2::And, srcs[0], 0x80000000.into());
|
||||
b.sel(denorm.into(), zero.into(), dst.into())
|
||||
} else {
|
||||
dst.into()
|
||||
dst
|
||||
}
|
||||
.into()
|
||||
}
|
||||
nir_op_frcp => {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.mufu(MuFuOp::Rcp, srcs[0])
|
||||
b.mufu(MuFuOp::Rcp, srcs[0]).into()
|
||||
}
|
||||
nir_op_frsq => {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.mufu(MuFuOp::Rsq, srcs[0])
|
||||
b.mufu(MuFuOp::Rsq, srcs[0]).into()
|
||||
}
|
||||
nir_op_fsat => {
|
||||
let ftype = FloatType::from_bits(alu.def.bit_size().into());
|
||||
|
||||
if self.alu_src_is_saturated(&alu.srcs_as_slice()[0]) {
|
||||
b.copy(srcs[0])
|
||||
b.copy(srcs[0]).into()
|
||||
} else if alu.def.bit_size() == 32 {
|
||||
let dst = b.alloc_ssa(RegFile::GPR);
|
||||
b.push_op(OpFAdd {
|
||||
|
|
@ -1273,11 +1278,11 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let hi = b.sel(lz.into(), 0xbff00000.into(), 0.into());
|
||||
let hi = b.sel(gz.into(), 0x3ff00000.into(), hi.into());
|
||||
let lo = b.copy(0.into());
|
||||
[lo[0], hi[0]].into()
|
||||
[lo, hi].into()
|
||||
} else if alu.def.bit_size() == 32 {
|
||||
let lz = b.fset(FloatCmpOp::OrdLt, srcs[0], 0.into());
|
||||
let gz = b.fset(FloatCmpOp::OrdGt, srcs[0], 0.into());
|
||||
b.fadd(gz.into(), Src::from(lz).fneg())
|
||||
b.fadd(gz.into(), Src::from(lz).fneg()).into()
|
||||
} else if alu.def.bit_size() == 16 {
|
||||
let x = restrict_f16v2_src(srcs[0]);
|
||||
|
||||
|
|
@ -1288,13 +1293,13 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.hset2(FloatCmpOp::OrdGt, x, 0.into()).into(),
|
||||
);
|
||||
|
||||
b.hadd2(gz, lz.fneg())
|
||||
b.hadd2(gz, lz.fneg()).into()
|
||||
} else {
|
||||
panic!("Unsupported float type: f{}", alu.def.bit_size());
|
||||
}
|
||||
}
|
||||
nir_op_fsin => b.fsin(srcs[0]),
|
||||
nir_op_fsqrt => b.mufu(MuFuOp::Sqrt, srcs[0]),
|
||||
nir_op_fsin => b.fsin(srcs[0]).into(),
|
||||
nir_op_fsqrt => b.mufu(MuFuOp::Sqrt, srcs[0]).into(),
|
||||
nir_op_i2f16 | nir_op_i2f32 | nir_op_i2f64 => {
|
||||
let src_bits = alu.get_src(0).src.bit_size();
|
||||
let dst_bits = alu.def.bit_size();
|
||||
|
|
@ -1347,38 +1352,39 @@ impl<'a> ShaderFromNir<'a> {
|
|||
if dst_bits == 64 {
|
||||
*src
|
||||
} else {
|
||||
b.prmt(src[0].into(), src[1].into(), prmt_lo)
|
||||
b.prmt(src[0].into(), src[1].into(), prmt_lo).into()
|
||||
}
|
||||
} else {
|
||||
if dst_bits == 64 {
|
||||
let lo = b.prmt(src[0].into(), 0.into(), prmt_lo);
|
||||
let hi = b.prmt(src[0].into(), 0.into(), prmt_hi);
|
||||
[lo[0], hi[0]].into()
|
||||
[lo, hi].into()
|
||||
} else {
|
||||
b.prmt(src[0].into(), 0.into(), prmt_lo)
|
||||
b.prmt(src[0].into(), 0.into(), prmt_lo).into()
|
||||
}
|
||||
}
|
||||
}
|
||||
nir_op_iabs => b.iabs(srcs[0]),
|
||||
nir_op_iabs => b.iabs(srcs[0]).into(),
|
||||
nir_op_iadd => match alu.def.bit_size {
|
||||
32 => b.iadd(srcs[0], srcs[1], 0.into()),
|
||||
32 => b.iadd(srcs[0], srcs[1], 0.into()).into(),
|
||||
64 => b.iadd64(srcs[0], srcs[1], 0.into()),
|
||||
x => panic!("unsupported bit size for nir_op_iadd: {x}"),
|
||||
},
|
||||
nir_op_iadd3 => match alu.def.bit_size {
|
||||
32 => b.iadd(srcs[0], srcs[1], srcs[2]),
|
||||
32 => b.iadd(srcs[0], srcs[1], srcs[2]).into(),
|
||||
64 => b.iadd64(srcs[0], srcs[1], srcs[2]),
|
||||
x => panic!("unsupported bit size for nir_op_iadd3: {x}"),
|
||||
},
|
||||
nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]),
|
||||
nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]).into(),
|
||||
nir_op_ieq => {
|
||||
if alu.get_src(0).bit_size() == 1 {
|
||||
b.lop2(LogicOp2::Xor, srcs[0], srcs[1].bnot())
|
||||
b.lop2(LogicOp2::Xor, srcs[0], srcs[1].bnot()).into()
|
||||
} else if alu.get_src(0).bit_size() == 64 {
|
||||
b.isetp64(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
|
||||
} else {
|
||||
assert!(alu.get_src(0).bit_size() == 32);
|
||||
b.isetp(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
|
||||
.into()
|
||||
}
|
||||
}
|
||||
nir_op_ifind_msb | nir_op_ifind_msb_rev | nir_op_ufind_msb
|
||||
|
|
@ -1414,7 +1420,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.isetp64(cmp_type, cmp_op, x.into(), y.into())
|
||||
} else {
|
||||
assert!(alu.get_src(0).bit_size() == 32);
|
||||
b.isetp(cmp_type, cmp_op, x.into(), y.into())
|
||||
b.isetp(cmp_type, cmp_op, x.into(), y.into()).into()
|
||||
}
|
||||
}
|
||||
nir_op_imad => {
|
||||
|
|
@ -1436,11 +1442,11 @@ impl<'a> ShaderFromNir<'a> {
|
|||
_ => panic!("Not an integer min/max"),
|
||||
};
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.imnmx(tp, srcs[0], srcs[1], min.into())
|
||||
b.imnmx(tp, srcs[0], srcs[1], min.into()).into()
|
||||
}
|
||||
nir_op_imul => {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.imul(srcs[0], srcs[1])
|
||||
b.imul(srcs[0], srcs[1]).into()
|
||||
}
|
||||
nir_op_imul_2x32_64 | nir_op_umul_2x32_64 => {
|
||||
let signed = alu.op == nir_op_imul_2x32_64;
|
||||
|
|
@ -1453,12 +1459,13 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
nir_op_ine => {
|
||||
if alu.get_src(0).bit_size() == 1 {
|
||||
b.lop2(LogicOp2::Xor, srcs[0], srcs[1])
|
||||
b.lop2(LogicOp2::Xor, srcs[0], srcs[1]).into()
|
||||
} else if alu.get_src(0).bit_size() == 64 {
|
||||
b.isetp64(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
|
||||
} else {
|
||||
assert!(alu.get_src(0).bit_size() == 32);
|
||||
b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
|
||||
.into()
|
||||
}
|
||||
}
|
||||
nir_op_ineg => {
|
||||
|
|
@ -1466,18 +1473,18 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.ineg64(srcs[0])
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.ineg(srcs[0])
|
||||
b.ineg(srcs[0]).into()
|
||||
}
|
||||
}
|
||||
nir_op_inot => {
|
||||
if alu.def.bit_size() == 1 {
|
||||
b.lop2(LogicOp2::PassB, true.into(), srcs[0].bnot())
|
||||
b.lop2(LogicOp2::PassB, true.into(), srcs[0].bnot()).into()
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.lop2(LogicOp2::PassB, 0.into(), srcs[0].bnot())
|
||||
b.lop2(LogicOp2::PassB, 0.into(), srcs[0].bnot()).into()
|
||||
}
|
||||
}
|
||||
nir_op_ior => b.lop2(LogicOp2::Or, srcs[0], srcs[1]),
|
||||
nir_op_ior => b.lop2(LogicOp2::Or, srcs[0], srcs[1]).into(),
|
||||
nir_op_ishl => {
|
||||
if alu.def.bit_size() == 64 {
|
||||
let shift = if let Some(s) = nir_srcs[1].comp_as_uint(0) {
|
||||
|
|
@ -1488,7 +1495,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.shl64(srcs[0], shift)
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.shl(srcs[0], srcs[1])
|
||||
b.shl(srcs[0], srcs[1]).into()
|
||||
}
|
||||
}
|
||||
nir_op_ishr => {
|
||||
|
|
@ -1501,7 +1508,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.shr64(srcs[0], shift, true)
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.shr(srcs[0], srcs[1], true)
|
||||
b.shr(srcs[0], srcs[1], true).into()
|
||||
}
|
||||
}
|
||||
nir_op_lea_nv => {
|
||||
|
|
@ -1509,17 +1516,17 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let src_b = srcs[0];
|
||||
let shift = nir_srcs[2].comp_as_uint(0).unwrap() as u8;
|
||||
match alu.def.bit_size {
|
||||
32 => b.lea(src_a, src_b, shift),
|
||||
32 => b.lea(src_a, src_b, shift).into(),
|
||||
64 => b.lea64(src_a, src_b, shift),
|
||||
x => panic!("unsupported bit size for nir_op_lea_nv: {x}"),
|
||||
}
|
||||
}
|
||||
nir_op_isub => match alu.def.bit_size {
|
||||
32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()),
|
||||
32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()).into(),
|
||||
64 => b.iadd64(srcs[0], srcs[1].ineg(), 0.into()),
|
||||
x => panic!("unsupported bit size for nir_op_iadd: {x}"),
|
||||
},
|
||||
nir_op_ixor => b.lop2(LogicOp2::Xor, srcs[0], srcs[1]),
|
||||
nir_op_ixor => b.lop2(LogicOp2::Xor, srcs[0], srcs[1]).into(),
|
||||
nir_op_pack_half_2x16_split | nir_op_pack_half_2x16_rtz_split => {
|
||||
assert!(alu.get_src(0).bit_size() == 32);
|
||||
|
||||
|
|
@ -1567,7 +1574,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
integer_rnd: false,
|
||||
});
|
||||
|
||||
b.prmt(low.into(), high.into(), [0, 1, 4, 5])
|
||||
b.prmt(low.into(), high.into(), [0, 1, 4, 5]).into()
|
||||
}
|
||||
}
|
||||
nir_op_prmt_nv => {
|
||||
|
|
@ -1644,10 +1651,10 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.sel(ovf_hi.into(), u32::MAX.into(), sum_lo.into());
|
||||
let hi =
|
||||
b.sel(ovf_hi.into(), u32::MAX.into(), sum_hi.into());
|
||||
[lo[0], hi[0]].into()
|
||||
[lo, hi].into()
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.sel(ovf_lo.into(), u32::MAX.into(), sum_lo.into())
|
||||
b.sel(ovf_lo.into(), u32::MAX.into(), sum_lo.into()).into()
|
||||
}
|
||||
}
|
||||
nir_op_usub_sat => {
|
||||
|
|
@ -1677,25 +1684,25 @@ impl<'a> ShaderFromNir<'a> {
|
|||
});
|
||||
let lo = b.sel(ovf_hi.into(), sum_lo.into(), 0.into());
|
||||
let hi = b.sel(ovf_hi.into(), sum_hi.into(), 0.into());
|
||||
[lo[0], hi[0]].into()
|
||||
[lo, hi].into()
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.sel(ovf_lo.into(), sum_lo.into(), 0.into())
|
||||
b.sel(ovf_lo.into(), sum_lo.into(), 0.into()).into()
|
||||
}
|
||||
}
|
||||
nir_op_unpack_32_2x16_split_x => {
|
||||
b.prmt(srcs[0], 0.into(), [0, 1, 4, 4])
|
||||
b.prmt(srcs[0], 0.into(), [0, 1, 4, 4]).into()
|
||||
}
|
||||
nir_op_unpack_32_2x16_split_y => {
|
||||
b.prmt(srcs[0], 0.into(), [2, 3, 4, 4])
|
||||
b.prmt(srcs[0], 0.into(), [2, 3, 4, 4]).into()
|
||||
}
|
||||
nir_op_unpack_64_2x32_split_x => {
|
||||
let src0_x = srcs[0].as_ssa().unwrap()[0];
|
||||
b.copy(src0_x.into())
|
||||
b.copy(src0_x.into()).into()
|
||||
}
|
||||
nir_op_unpack_64_2x32_split_y => {
|
||||
let src0_y = srcs[0].as_ssa().unwrap()[1];
|
||||
b.copy(src0_y.into())
|
||||
b.copy(src0_y.into()).into()
|
||||
}
|
||||
nir_op_unpack_half_2x16_split_x
|
||||
| nir_op_unpack_half_2x16_split_y => {
|
||||
|
|
@ -1725,7 +1732,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.shr64(srcs[0], shift, false)
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.shr(srcs[0], srcs[1], false)
|
||||
b.shr(srcs[0], srcs[1], false).into()
|
||||
}
|
||||
}
|
||||
_ => panic!("Unsupported ALU instruction: {}", alu.info().name()),
|
||||
|
|
@ -1952,9 +1959,9 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let Dst::SSA(fault) = fault else {
|
||||
panic!("No fault value for sparse op");
|
||||
};
|
||||
nir_dst.push(b.sel(fault.into(), 0.into(), 1.into())[0]);
|
||||
nir_dst.push(b.sel(fault.into(), 0.into(), 1.into()));
|
||||
} else if mask & (1 << i) == 0 {
|
||||
nir_dst.push(b.copy(0.into())[0]);
|
||||
nir_dst.push(b.copy(0.into()));
|
||||
} else {
|
||||
nir_dst.push(dst[di]);
|
||||
di += 1;
|
||||
|
|
@ -2417,7 +2424,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
cond: cond.into(),
|
||||
});
|
||||
|
||||
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
|
||||
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()).into());
|
||||
}
|
||||
nir_intrinsic_bar_set_nv => {
|
||||
let label = self.label_alloc.alloc();
|
||||
|
|
@ -2437,7 +2444,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
target: label,
|
||||
});
|
||||
|
||||
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
|
||||
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()).into());
|
||||
}
|
||||
nir_intrinsic_bar_sync_nv => {
|
||||
let src = self.get_src(&srcs[0]);
|
||||
|
|
@ -2601,7 +2608,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
for i in 0..usize::from(comps) - 1 {
|
||||
final_dst.push(dst[i]);
|
||||
}
|
||||
final_dst.push(b.sel(fault.into(), 0.into(), 1.into())[0]);
|
||||
final_dst.push(b.sel(fault.into(), 0.into(), 1.into()));
|
||||
|
||||
self.set_ssa(&intrin.def, final_dst);
|
||||
}
|
||||
|
|
@ -3408,7 +3415,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
1 => {
|
||||
for c in 0..load_const.def.num_components {
|
||||
let imm_b1 = unsafe { values[usize::from(c)].b };
|
||||
dst.push(b.copy(imm_b1.into())[0]);
|
||||
dst.push(b.copy(imm_b1.into()));
|
||||
}
|
||||
}
|
||||
8 => {
|
||||
|
|
@ -3421,7 +3428,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
imm_u32 |= u32::from(imm_u8) << b * 8;
|
||||
}
|
||||
}
|
||||
dst.push(b.copy(imm_u32.into())[0]);
|
||||
dst.push(b.copy(imm_u32.into()));
|
||||
}
|
||||
}
|
||||
16 => {
|
||||
|
|
@ -3435,20 +3442,20 @@ impl<'a> ShaderFromNir<'a> {
|
|||
imm_u32 |= u32::from(imm_u16) << w * 16;
|
||||
}
|
||||
}
|
||||
dst.push(b.copy(imm_u32.into())[0]);
|
||||
dst.push(b.copy(imm_u32.into()));
|
||||
}
|
||||
}
|
||||
32 => {
|
||||
for c in 0..load_const.def.num_components {
|
||||
let imm_u32 = unsafe { values[usize::from(c)].u32_ };
|
||||
dst.push(b.copy(imm_u32.into())[0]);
|
||||
dst.push(b.copy(imm_u32.into()));
|
||||
}
|
||||
}
|
||||
64 => {
|
||||
for c in 0..load_const.def.num_components {
|
||||
let imm_u64 = unsafe { values[c as usize].u64_ };
|
||||
dst.push(b.copy((imm_u64 as u32).into())[0]);
|
||||
dst.push(b.copy(((imm_u64 >> 32) as u32).into())[0]);
|
||||
dst.push(b.copy((imm_u64 as u32).into()));
|
||||
dst.push(b.copy(((imm_u64 >> 32) as u32).into()));
|
||||
}
|
||||
}
|
||||
_ => panic!("Unknown bit size: {}", load_const.def.bit_size),
|
||||
|
|
|
|||
|
|
@ -109,8 +109,8 @@ impl<'a> TestShaderBuilder<'a> {
|
|||
};
|
||||
|
||||
let data_offset = SSARef::from([
|
||||
b.imul(invoc_id.into(), data_stride.into())[0],
|
||||
b.copy(0.into())[0],
|
||||
b.imul(invoc_id.into(), data_stride.into()),
|
||||
b.copy(0.into()),
|
||||
]);
|
||||
let data_addr =
|
||||
b.iadd64(data_addr.into(), data_offset.into(), 0.into());
|
||||
|
|
@ -122,7 +122,7 @@ impl<'a> TestShaderBuilder<'a> {
|
|||
invoc_id.into(),
|
||||
invocations.into(),
|
||||
);
|
||||
b.predicate(oob[0].into()).push_op(OpExit {});
|
||||
b.predicate(oob.into()).push_op(OpExit {});
|
||||
|
||||
let start_block = BasicBlock {
|
||||
label: label_alloc.alloc(),
|
||||
|
|
@ -411,7 +411,7 @@ pub fn test_foldable_op_with(
|
|||
}
|
||||
file => panic!("Can't auto-test {file:?} data"),
|
||||
};
|
||||
b.st_test_data(comps * 4, MemType::B32, u);
|
||||
b.st_test_data(comps * 4, MemType::B32, u.into());
|
||||
comps += 1;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue