nak: Return SSAValue from builder where possible

Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34794>
This commit is contained in:
Mel Henning 2025-04-30 15:54:59 -04:00 committed by Marge Bot
parent b9e9a811b8
commit ffe438c77d
3 changed files with 127 additions and 120 deletions

View file

@ -120,7 +120,7 @@ pub trait SSABuilder: Builder {
fn alloc_ssa(&mut self, file: RegFile) -> SSAValue;
fn alloc_ssa_vec(&mut self, file: RegFile, comps: u8) -> SSARef;
fn shl(&mut self, x: Src, shift: Src) -> SSARef {
fn shl(&mut self, x: Src, shift: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
if self.sm() >= 70 {
self.push_op(OpShf {
@ -141,7 +141,7 @@ pub trait SSABuilder: Builder {
wrap: true,
});
}
dst.into()
dst
}
fn shl64(&mut self, x: Src, shift: Src) -> SSARef {
@ -188,7 +188,7 @@ pub trait SSABuilder: Builder {
dst
}
fn shr(&mut self, x: Src, shift: Src, signed: bool) -> SSARef {
fn shr(&mut self, x: Src, shift: Src, signed: bool) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
if self.sm() >= 70 {
self.push_op(OpShf {
@ -210,7 +210,7 @@ pub trait SSABuilder: Builder {
signed,
});
}
dst.into()
dst
}
fn shr64(&mut self, x: Src, shift: Src, signed: bool) -> SSARef {
@ -241,7 +241,7 @@ pub trait SSABuilder: Builder {
dst
}
fn fadd(&mut self, x: Src, y: Src) -> SSARef {
fn fadd(&mut self, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpFAdd {
dst: dst.into(),
@ -250,10 +250,10 @@ pub trait SSABuilder: Builder {
rnd_mode: FRndMode::NearestEven,
ftz: false,
});
dst.into()
dst
}
fn fmul(&mut self, x: Src, y: Src) -> SSARef {
fn fmul(&mut self, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpFMul {
dst: dst.into(),
@ -263,10 +263,10 @@ pub trait SSABuilder: Builder {
ftz: false,
dnz: false,
});
dst.into()
dst
}
fn fset(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
fn fset(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpFSet {
dst: dst.into(),
@ -274,10 +274,10 @@ pub trait SSABuilder: Builder {
srcs: [x, y],
ftz: false,
});
dst.into()
dst
}
fn fsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
fn fsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::Pred);
self.push_op(OpFSetP {
dst: dst.into(),
@ -287,10 +287,10 @@ pub trait SSABuilder: Builder {
accum: SrcRef::True.into(),
ftz: false,
});
dst.into()
dst
}
fn hadd2(&mut self, x: Src, y: Src) -> SSARef {
fn hadd2(&mut self, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpHAdd2 {
dst: dst.into(),
@ -299,7 +299,7 @@ pub trait SSABuilder: Builder {
ftz: false,
f32: false,
});
dst.into()
dst
}
fn hset2(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
@ -315,7 +315,7 @@ pub trait SSABuilder: Builder {
dst.into()
}
fn dsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
fn dsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::Pred);
self.push_op(OpDSetP {
dst: dst.into(),
@ -324,10 +324,10 @@ pub trait SSABuilder: Builder {
srcs: [x, y],
accum: SrcRef::True.into(),
});
dst.into()
dst
}
fn iabs(&mut self, i: Src) -> SSARef {
fn iabs(&mut self, i: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
if self.sm() >= 70 {
self.push_op(OpIAbs {
@ -345,10 +345,10 @@ pub trait SSABuilder: Builder {
neg: false,
});
}
dst.into()
dst
}
fn iadd(&mut self, x: Src, y: Src, z: Src) -> SSARef {
fn iadd(&mut self, x: Src, y: Src, z: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
if self.sm() >= 70 {
self.push_op(OpIAdd3 {
@ -364,7 +364,7 @@ pub trait SSABuilder: Builder {
carry_out: Dst::None,
});
}
dst.into()
dst
}
fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef {
@ -428,7 +428,7 @@ pub trait SSABuilder: Builder {
dst
}
fn imnmx(&mut self, tp: IntCmpType, x: Src, y: Src, min: Src) -> SSARef {
fn imnmx(&mut self, tp: IntCmpType, x: Src, y: Src, min: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpIMnMx {
dst: dst.into(),
@ -436,10 +436,10 @@ pub trait SSABuilder: Builder {
srcs: [x, y],
min: min,
});
dst.into()
dst
}
fn imul(&mut self, x: Src, y: Src) -> SSARef {
fn imul(&mut self, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
if self.sm() >= 70 {
self.push_op(OpIMad {
@ -455,7 +455,7 @@ pub trait SSABuilder: Builder {
high: false,
});
}
dst.into()
dst
}
fn imul_2x32_64(&mut self, x: Src, y: Src, signed: bool) -> SSARef {
@ -483,7 +483,7 @@ pub trait SSABuilder: Builder {
dst
}
fn ineg(&mut self, i: Src) -> SSARef {
fn ineg(&mut self, i: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
if self.sm() >= 70 {
self.push_op(OpIAdd3 {
@ -498,7 +498,7 @@ pub trait SSABuilder: Builder {
carry_out: Dst::None,
});
}
dst.into()
dst
}
fn ineg64(&mut self, x: Src) -> SSARef {
@ -511,7 +511,7 @@ pub trait SSABuilder: Builder {
cmp_op: IntCmpOp,
x: Src,
y: Src,
) -> SSARef {
) -> SSAValue {
let dst = self.alloc_ssa(RegFile::Pred);
self.push_op(OpISetP {
dst: dst.into(),
@ -523,7 +523,7 @@ pub trait SSABuilder: Builder {
accum: true.into(),
low_cmp: true.into(),
});
dst.into()
dst
}
fn isetp64(
@ -611,7 +611,7 @@ pub trait SSABuilder: Builder {
dst.into()
}
fn lea(&mut self, a: Src, b: Src, shift: u8) -> SSARef {
fn lea(&mut self, a: Src, b: Src, shift: u8) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
assert!(self.sm() >= 70);
@ -626,7 +626,7 @@ pub trait SSABuilder: Builder {
intermediate_mod: SrcMod::None,
});
dst.into()
dst
}
fn lea64(&mut self, a: Src, b: Src, shift: u8) -> SSARef {
@ -677,17 +677,17 @@ pub trait SSABuilder: Builder {
dst
}
fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef {
fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSAValue {
let dst = if x.is_predicate() {
self.alloc_ssa(RegFile::Pred)
} else {
self.alloc_ssa(RegFile::GPR)
};
self.lop2_to(dst.into(), op, x, y);
dst.into()
dst
}
fn brev(&mut self, x: Src) -> SSARef {
fn brev(&mut self, x: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
if self.sm() >= 70 {
self.push_op(OpBRev {
@ -704,20 +704,20 @@ pub trait SSABuilder: Builder {
reverse: true,
});
}
dst.into()
dst
}
fn mufu(&mut self, op: MuFuOp, src: Src) -> SSARef {
fn mufu(&mut self, op: MuFuOp, src: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpMuFu {
dst: dst.into(),
op: op,
src: src,
});
dst.into()
dst
}
fn fsin(&mut self, src: Src) -> SSARef {
fn fsin(&mut self, src: Src) -> SSAValue {
let tmp = if self.sm() >= 70 {
let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
self.fmul(src, frac_1_2pi.into())
@ -733,7 +733,7 @@ pub trait SSABuilder: Builder {
self.mufu(MuFuOp::Sin, tmp.into())
}
fn fcos(&mut self, src: Src) -> SSARef {
fn fcos(&mut self, src: Src) -> SSAValue {
let tmp = if self.sm() >= 70 {
let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
self.fmul(src, frac_1_2pi.into())
@ -749,7 +749,7 @@ pub trait SSABuilder: Builder {
self.mufu(MuFuOp::Cos, tmp.into())
}
fn fexp2(&mut self, src: Src) -> SSARef {
fn fexp2(&mut self, src: Src) -> SSAValue {
let tmp = if self.sm() >= 70 {
src
} else {
@ -764,13 +764,13 @@ pub trait SSABuilder: Builder {
self.mufu(MuFuOp::Exp2, tmp)
}
fn prmt(&mut self, x: Src, y: Src, sel: [u8; 4]) -> SSARef {
fn prmt(&mut self, x: Src, y: Src, sel: [u8; 4]) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.prmt_to(dst.into(), x, y, sel);
dst.into()
dst
}
fn prmt4(&mut self, src: [Src; 4], sel: [u8; 4]) -> SSARef {
fn prmt4(&mut self, src: [Src; 4], sel: [u8; 4]) -> SSAValue {
let max_sel = *sel.iter().max().unwrap();
if max_sel < 8 {
self.prmt(src[0], src[1], sel)
@ -808,7 +808,7 @@ pub trait SSABuilder: Builder {
}
}
fn sel(&mut self, cond: Src, x: Src, y: Src) -> SSARef {
fn sel(&mut self, cond: Src, x: Src, y: Src) -> SSAValue {
assert!(cond.src_ref.is_predicate());
assert!(x.is_predicate() == y.is_predicate());
if x.is_predicate() {
@ -835,7 +835,7 @@ pub trait SSABuilder: Builder {
srcs: [cond.bnot(), y, tmp.into()],
});
}
dst.into()
dst
} else {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpSel {
@ -843,17 +843,17 @@ pub trait SSABuilder: Builder {
cond: cond,
srcs: [x, y],
});
dst.into()
dst
}
}
fn undef(&mut self) -> SSARef {
fn undef(&mut self) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpUndef { dst: dst.into() });
dst.into()
dst
}
fn copy(&mut self, src: Src) -> SSARef {
fn copy(&mut self, src: Src) -> SSAValue {
let dst = if src.is_predicate() {
self.alloc_ssa(RegFile::Pred)
} else {
@ -863,7 +863,7 @@ pub trait SSABuilder: Builder {
dst.into()
}
fn bmov_to_bar(&mut self, src: Src) -> SSARef {
fn bmov_to_bar(&mut self, src: Src) -> SSAValue {
assert!(src.src_ref.as_ssa().unwrap().file() == Some(RegFile::GPR));
let dst = self.alloc_ssa(RegFile::Bar);
self.push_op(OpBMov {
@ -871,10 +871,10 @@ pub trait SSABuilder: Builder {
src: src,
clear: false,
});
dst.into()
dst
}
fn bmov_to_gpr(&mut self, src: Src) -> SSARef {
fn bmov_to_gpr(&mut self, src: Src) -> SSAValue {
assert!(src.src_ref.as_ssa().unwrap().file() == Some(RegFile::Bar));
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpBMov {
@ -882,7 +882,7 @@ pub trait SSABuilder: Builder {
src: src,
clear: false,
});
dst.into()
dst
}
}

View file

@ -563,7 +563,7 @@ impl<'a> ShaderFromNir<'a> {
// we'll need a copy in that case. If the copy
// isn't needed, copy-prop should clean it up for
// us.
comps.push(b.copy(ssa.into())[0]);
comps.push(b.copy(ssa.into()));
}
}
8 => {
@ -603,7 +603,7 @@ impl<'a> ShaderFromNir<'a> {
res
};
comps.push(b.prmt4(psrc, psel)[0]);
comps.push(b.prmt4(psrc, psel));
}
}
16 => {
@ -620,7 +620,7 @@ impl<'a> ShaderFromNir<'a> {
psel[w * 2 + 1] = (w as u8 * 4) + byte + 1;
}
}
comps.push(b.prmt(psrc[0], psrc[1], psel)[0]);
comps.push(b.prmt(psrc[0], psrc[1], psel));
}
}
_ => panic!("Unknown bit size: {src_bit_size}"),
@ -706,25 +706,28 @@ impl<'a> ShaderFromNir<'a> {
nir_op_b2b1 => {
assert!(alu.get_src(0).bit_size() == 32);
b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], 0.into())
.into()
}
nir_op_b2b32 | nir_op_b2i8 | nir_op_b2i16 | nir_op_b2i32 => {
b.sel(srcs[0].bnot(), 0.into(), 1.into())
b.sel(srcs[0].bnot(), 0.into(), 1.into()).into()
}
nir_op_b2i64 => {
let lo = b.sel(srcs[0].bnot(), 0.into(), 1.into());
let hi = b.copy(0.into());
[lo[0], hi[0]].into()
[lo, hi].into()
}
nir_op_b2f16 => {
b.sel(srcs[0].bnot(), 0.into(), 0x3c00.into()).into()
}
nir_op_b2f16 => b.sel(srcs[0].bnot(), 0.into(), 0x3c00.into()),
nir_op_b2f32 => {
b.sel(srcs[0].bnot(), 0.0_f32.into(), 1.0_f32.into())
b.sel(srcs[0].bnot(), 0.0_f32.into(), 1.0_f32.into()).into()
}
nir_op_b2f64 => {
let lo = b.copy(0.into());
let hi = b.sel(srcs[0].bnot(), 0.into(), 0x3ff00000.into());
[lo[0], hi[0]].into()
[lo, hi].into()
}
nir_op_bcsel => b.sel(srcs[0], srcs[1], srcs[2]),
nir_op_bcsel => b.sel(srcs[0], srcs[1], srcs[2]).into(),
nir_op_bfm => {
let dst = b.alloc_ssa(RegFile::GPR);
b.push_op(OpBMsk {
@ -743,7 +746,7 @@ impl<'a> ShaderFromNir<'a> {
});
dst.into()
}
nir_op_bitfield_reverse => b.brev(srcs[0]),
nir_op_bitfield_reverse => b.brev(srcs[0]).into(),
nir_op_ibitfield_extract | nir_op_ubitfield_extract => {
let range = b.alloc_ssa(RegFile::GPR);
b.push_op(OpPrmt {
@ -796,6 +799,7 @@ impl<'a> ShaderFromNir<'a> {
}
_ => panic!("Unknown extract op: {}", alu.op),
}
.into()
}
nir_op_f2f16 | nir_op_f2f16_rtne | nir_op_f2f16_rtz
| nir_op_f2f32 | nir_op_f2f64 => {
@ -960,7 +964,7 @@ impl<'a> ShaderFromNir<'a> {
}
dst.into()
}
nir_op_fcos => b.fcos(srcs[0]),
nir_op_fcos => b.fcos(srcs[0]).into(),
nir_op_feq | nir_op_fge | nir_op_flt | nir_op_fneu => {
let src_type =
FloatType::from_bits(alu.get_src(0).bit_size().into());
@ -1025,7 +1029,7 @@ impl<'a> ShaderFromNir<'a> {
}
dst
}
nir_op_fexp2 => b.fexp2(srcs[0]),
nir_op_fexp2 => b.fexp2(srcs[0]).into(),
nir_op_ffma => {
let ftype = FloatType::from_bits(alu.def.bit_size().into());
let dst;
@ -1091,7 +1095,7 @@ impl<'a> ShaderFromNir<'a> {
}
nir_op_flog2 => {
assert!(alu.def.bit_size() == 32);
b.mufu(MuFuOp::Log2, srcs[0])
b.mufu(MuFuOp::Log2, srcs[0]).into()
}
nir_op_fmax | nir_op_fmin => {
let dst;
@ -1222,22 +1226,23 @@ impl<'a> ShaderFromNir<'a> {
b.lop2(LogicOp2::And, srcs[0], 0x80000000.into());
b.sel(denorm.into(), zero.into(), dst.into())
} else {
dst.into()
dst
}
.into()
}
nir_op_frcp => {
assert!(alu.def.bit_size() == 32);
b.mufu(MuFuOp::Rcp, srcs[0])
b.mufu(MuFuOp::Rcp, srcs[0]).into()
}
nir_op_frsq => {
assert!(alu.def.bit_size() == 32);
b.mufu(MuFuOp::Rsq, srcs[0])
b.mufu(MuFuOp::Rsq, srcs[0]).into()
}
nir_op_fsat => {
let ftype = FloatType::from_bits(alu.def.bit_size().into());
if self.alu_src_is_saturated(&alu.srcs_as_slice()[0]) {
b.copy(srcs[0])
b.copy(srcs[0]).into()
} else if alu.def.bit_size() == 32 {
let dst = b.alloc_ssa(RegFile::GPR);
b.push_op(OpFAdd {
@ -1273,11 +1278,11 @@ impl<'a> ShaderFromNir<'a> {
let hi = b.sel(lz.into(), 0xbff00000.into(), 0.into());
let hi = b.sel(gz.into(), 0x3ff00000.into(), hi.into());
let lo = b.copy(0.into());
[lo[0], hi[0]].into()
[lo, hi].into()
} else if alu.def.bit_size() == 32 {
let lz = b.fset(FloatCmpOp::OrdLt, srcs[0], 0.into());
let gz = b.fset(FloatCmpOp::OrdGt, srcs[0], 0.into());
b.fadd(gz.into(), Src::from(lz).fneg())
b.fadd(gz.into(), Src::from(lz).fneg()).into()
} else if alu.def.bit_size() == 16 {
let x = restrict_f16v2_src(srcs[0]);
@ -1288,13 +1293,13 @@ impl<'a> ShaderFromNir<'a> {
b.hset2(FloatCmpOp::OrdGt, x, 0.into()).into(),
);
b.hadd2(gz, lz.fneg())
b.hadd2(gz, lz.fneg()).into()
} else {
panic!("Unsupported float type: f{}", alu.def.bit_size());
}
}
nir_op_fsin => b.fsin(srcs[0]),
nir_op_fsqrt => b.mufu(MuFuOp::Sqrt, srcs[0]),
nir_op_fsin => b.fsin(srcs[0]).into(),
nir_op_fsqrt => b.mufu(MuFuOp::Sqrt, srcs[0]).into(),
nir_op_i2f16 | nir_op_i2f32 | nir_op_i2f64 => {
let src_bits = alu.get_src(0).src.bit_size();
let dst_bits = alu.def.bit_size();
@ -1347,38 +1352,39 @@ impl<'a> ShaderFromNir<'a> {
if dst_bits == 64 {
*src
} else {
b.prmt(src[0].into(), src[1].into(), prmt_lo)
b.prmt(src[0].into(), src[1].into(), prmt_lo).into()
}
} else {
if dst_bits == 64 {
let lo = b.prmt(src[0].into(), 0.into(), prmt_lo);
let hi = b.prmt(src[0].into(), 0.into(), prmt_hi);
[lo[0], hi[0]].into()
[lo, hi].into()
} else {
b.prmt(src[0].into(), 0.into(), prmt_lo)
b.prmt(src[0].into(), 0.into(), prmt_lo).into()
}
}
}
nir_op_iabs => b.iabs(srcs[0]),
nir_op_iabs => b.iabs(srcs[0]).into(),
nir_op_iadd => match alu.def.bit_size {
32 => b.iadd(srcs[0], srcs[1], 0.into()),
32 => b.iadd(srcs[0], srcs[1], 0.into()).into(),
64 => b.iadd64(srcs[0], srcs[1], 0.into()),
x => panic!("unsupported bit size for nir_op_iadd: {x}"),
},
nir_op_iadd3 => match alu.def.bit_size {
32 => b.iadd(srcs[0], srcs[1], srcs[2]),
32 => b.iadd(srcs[0], srcs[1], srcs[2]).into(),
64 => b.iadd64(srcs[0], srcs[1], srcs[2]),
x => panic!("unsupported bit size for nir_op_iadd3: {x}"),
},
nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]),
nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]).into(),
nir_op_ieq => {
if alu.get_src(0).bit_size() == 1 {
b.lop2(LogicOp2::Xor, srcs[0], srcs[1].bnot())
b.lop2(LogicOp2::Xor, srcs[0], srcs[1].bnot()).into()
} else if alu.get_src(0).bit_size() == 64 {
b.isetp64(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
} else {
assert!(alu.get_src(0).bit_size() == 32);
b.isetp(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
.into()
}
}
nir_op_ifind_msb | nir_op_ifind_msb_rev | nir_op_ufind_msb
@ -1414,7 +1420,7 @@ impl<'a> ShaderFromNir<'a> {
b.isetp64(cmp_type, cmp_op, x.into(), y.into())
} else {
assert!(alu.get_src(0).bit_size() == 32);
b.isetp(cmp_type, cmp_op, x.into(), y.into())
b.isetp(cmp_type, cmp_op, x.into(), y.into()).into()
}
}
nir_op_imad => {
@ -1436,11 +1442,11 @@ impl<'a> ShaderFromNir<'a> {
_ => panic!("Not an integer min/max"),
};
assert!(alu.def.bit_size() == 32);
b.imnmx(tp, srcs[0], srcs[1], min.into())
b.imnmx(tp, srcs[0], srcs[1], min.into()).into()
}
nir_op_imul => {
assert!(alu.def.bit_size() == 32);
b.imul(srcs[0], srcs[1])
b.imul(srcs[0], srcs[1]).into()
}
nir_op_imul_2x32_64 | nir_op_umul_2x32_64 => {
let signed = alu.op == nir_op_imul_2x32_64;
@ -1453,12 +1459,13 @@ impl<'a> ShaderFromNir<'a> {
}
nir_op_ine => {
if alu.get_src(0).bit_size() == 1 {
b.lop2(LogicOp2::Xor, srcs[0], srcs[1])
b.lop2(LogicOp2::Xor, srcs[0], srcs[1]).into()
} else if alu.get_src(0).bit_size() == 64 {
b.isetp64(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
} else {
assert!(alu.get_src(0).bit_size() == 32);
b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
.into()
}
}
nir_op_ineg => {
@ -1466,18 +1473,18 @@ impl<'a> ShaderFromNir<'a> {
b.ineg64(srcs[0])
} else {
assert!(alu.def.bit_size() == 32);
b.ineg(srcs[0])
b.ineg(srcs[0]).into()
}
}
nir_op_inot => {
if alu.def.bit_size() == 1 {
b.lop2(LogicOp2::PassB, true.into(), srcs[0].bnot())
b.lop2(LogicOp2::PassB, true.into(), srcs[0].bnot()).into()
} else {
assert!(alu.def.bit_size() == 32);
b.lop2(LogicOp2::PassB, 0.into(), srcs[0].bnot())
b.lop2(LogicOp2::PassB, 0.into(), srcs[0].bnot()).into()
}
}
nir_op_ior => b.lop2(LogicOp2::Or, srcs[0], srcs[1]),
nir_op_ior => b.lop2(LogicOp2::Or, srcs[0], srcs[1]).into(),
nir_op_ishl => {
if alu.def.bit_size() == 64 {
let shift = if let Some(s) = nir_srcs[1].comp_as_uint(0) {
@ -1488,7 +1495,7 @@ impl<'a> ShaderFromNir<'a> {
b.shl64(srcs[0], shift)
} else {
assert!(alu.def.bit_size() == 32);
b.shl(srcs[0], srcs[1])
b.shl(srcs[0], srcs[1]).into()
}
}
nir_op_ishr => {
@ -1501,7 +1508,7 @@ impl<'a> ShaderFromNir<'a> {
b.shr64(srcs[0], shift, true)
} else {
assert!(alu.def.bit_size() == 32);
b.shr(srcs[0], srcs[1], true)
b.shr(srcs[0], srcs[1], true).into()
}
}
nir_op_lea_nv => {
@ -1509,17 +1516,17 @@ impl<'a> ShaderFromNir<'a> {
let src_b = srcs[0];
let shift = nir_srcs[2].comp_as_uint(0).unwrap() as u8;
match alu.def.bit_size {
32 => b.lea(src_a, src_b, shift),
32 => b.lea(src_a, src_b, shift).into(),
64 => b.lea64(src_a, src_b, shift),
x => panic!("unsupported bit size for nir_op_lea_nv: {x}"),
}
}
nir_op_isub => match alu.def.bit_size {
32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()),
32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()).into(),
64 => b.iadd64(srcs[0], srcs[1].ineg(), 0.into()),
x => panic!("unsupported bit size for nir_op_iadd: {x}"),
},
nir_op_ixor => b.lop2(LogicOp2::Xor, srcs[0], srcs[1]),
nir_op_ixor => b.lop2(LogicOp2::Xor, srcs[0], srcs[1]).into(),
nir_op_pack_half_2x16_split | nir_op_pack_half_2x16_rtz_split => {
assert!(alu.get_src(0).bit_size() == 32);
@ -1567,7 +1574,7 @@ impl<'a> ShaderFromNir<'a> {
integer_rnd: false,
});
b.prmt(low.into(), high.into(), [0, 1, 4, 5])
b.prmt(low.into(), high.into(), [0, 1, 4, 5]).into()
}
}
nir_op_prmt_nv => {
@ -1644,10 +1651,10 @@ impl<'a> ShaderFromNir<'a> {
b.sel(ovf_hi.into(), u32::MAX.into(), sum_lo.into());
let hi =
b.sel(ovf_hi.into(), u32::MAX.into(), sum_hi.into());
[lo[0], hi[0]].into()
[lo, hi].into()
} else {
assert!(alu.def.bit_size() == 32);
b.sel(ovf_lo.into(), u32::MAX.into(), sum_lo.into())
b.sel(ovf_lo.into(), u32::MAX.into(), sum_lo.into()).into()
}
}
nir_op_usub_sat => {
@ -1677,25 +1684,25 @@ impl<'a> ShaderFromNir<'a> {
});
let lo = b.sel(ovf_hi.into(), sum_lo.into(), 0.into());
let hi = b.sel(ovf_hi.into(), sum_hi.into(), 0.into());
[lo[0], hi[0]].into()
[lo, hi].into()
} else {
assert!(alu.def.bit_size() == 32);
b.sel(ovf_lo.into(), sum_lo.into(), 0.into())
b.sel(ovf_lo.into(), sum_lo.into(), 0.into()).into()
}
}
nir_op_unpack_32_2x16_split_x => {
b.prmt(srcs[0], 0.into(), [0, 1, 4, 4])
b.prmt(srcs[0], 0.into(), [0, 1, 4, 4]).into()
}
nir_op_unpack_32_2x16_split_y => {
b.prmt(srcs[0], 0.into(), [2, 3, 4, 4])
b.prmt(srcs[0], 0.into(), [2, 3, 4, 4]).into()
}
nir_op_unpack_64_2x32_split_x => {
let src0_x = srcs[0].as_ssa().unwrap()[0];
b.copy(src0_x.into())
b.copy(src0_x.into()).into()
}
nir_op_unpack_64_2x32_split_y => {
let src0_y = srcs[0].as_ssa().unwrap()[1];
b.copy(src0_y.into())
b.copy(src0_y.into()).into()
}
nir_op_unpack_half_2x16_split_x
| nir_op_unpack_half_2x16_split_y => {
@ -1725,7 +1732,7 @@ impl<'a> ShaderFromNir<'a> {
b.shr64(srcs[0], shift, false)
} else {
assert!(alu.def.bit_size() == 32);
b.shr(srcs[0], srcs[1], false)
b.shr(srcs[0], srcs[1], false).into()
}
}
_ => panic!("Unsupported ALU instruction: {}", alu.info().name()),
@ -1952,9 +1959,9 @@ impl<'a> ShaderFromNir<'a> {
let Dst::SSA(fault) = fault else {
panic!("No fault value for sparse op");
};
nir_dst.push(b.sel(fault.into(), 0.into(), 1.into())[0]);
nir_dst.push(b.sel(fault.into(), 0.into(), 1.into()));
} else if mask & (1 << i) == 0 {
nir_dst.push(b.copy(0.into())[0]);
nir_dst.push(b.copy(0.into()));
} else {
nir_dst.push(dst[di]);
di += 1;
@ -2417,7 +2424,7 @@ impl<'a> ShaderFromNir<'a> {
cond: cond.into(),
});
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()).into());
}
nir_intrinsic_bar_set_nv => {
let label = self.label_alloc.alloc();
@ -2437,7 +2444,7 @@ impl<'a> ShaderFromNir<'a> {
target: label,
});
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()).into());
}
nir_intrinsic_bar_sync_nv => {
let src = self.get_src(&srcs[0]);
@ -2601,7 +2608,7 @@ impl<'a> ShaderFromNir<'a> {
for i in 0..usize::from(comps) - 1 {
final_dst.push(dst[i]);
}
final_dst.push(b.sel(fault.into(), 0.into(), 1.into())[0]);
final_dst.push(b.sel(fault.into(), 0.into(), 1.into()));
self.set_ssa(&intrin.def, final_dst);
}
@ -3408,7 +3415,7 @@ impl<'a> ShaderFromNir<'a> {
1 => {
for c in 0..load_const.def.num_components {
let imm_b1 = unsafe { values[usize::from(c)].b };
dst.push(b.copy(imm_b1.into())[0]);
dst.push(b.copy(imm_b1.into()));
}
}
8 => {
@ -3421,7 +3428,7 @@ impl<'a> ShaderFromNir<'a> {
imm_u32 |= u32::from(imm_u8) << b * 8;
}
}
dst.push(b.copy(imm_u32.into())[0]);
dst.push(b.copy(imm_u32.into()));
}
}
16 => {
@ -3435,20 +3442,20 @@ impl<'a> ShaderFromNir<'a> {
imm_u32 |= u32::from(imm_u16) << w * 16;
}
}
dst.push(b.copy(imm_u32.into())[0]);
dst.push(b.copy(imm_u32.into()));
}
}
32 => {
for c in 0..load_const.def.num_components {
let imm_u32 = unsafe { values[usize::from(c)].u32_ };
dst.push(b.copy(imm_u32.into())[0]);
dst.push(b.copy(imm_u32.into()));
}
}
64 => {
for c in 0..load_const.def.num_components {
let imm_u64 = unsafe { values[c as usize].u64_ };
dst.push(b.copy((imm_u64 as u32).into())[0]);
dst.push(b.copy(((imm_u64 >> 32) as u32).into())[0]);
dst.push(b.copy((imm_u64 as u32).into()));
dst.push(b.copy(((imm_u64 >> 32) as u32).into()));
}
}
_ => panic!("Unknown bit size: {}", load_const.def.bit_size),

View file

@ -109,8 +109,8 @@ impl<'a> TestShaderBuilder<'a> {
};
let data_offset = SSARef::from([
b.imul(invoc_id.into(), data_stride.into())[0],
b.copy(0.into())[0],
b.imul(invoc_id.into(), data_stride.into()),
b.copy(0.into()),
]);
let data_addr =
b.iadd64(data_addr.into(), data_offset.into(), 0.into());
@ -122,7 +122,7 @@ impl<'a> TestShaderBuilder<'a> {
invoc_id.into(),
invocations.into(),
);
b.predicate(oob[0].into()).push_op(OpExit {});
b.predicate(oob.into()).push_op(OpExit {});
let start_block = BasicBlock {
label: label_alloc.alloc(),
@ -411,7 +411,7 @@ pub fn test_foldable_op_with(
}
file => panic!("Can't auto-test {file:?} data"),
};
b.st_test_data(comps * 4, MemType::B32, u);
b.st_test_data(comps * 4, MemType::B32, u.into());
comps += 1;
}
}