nak: Move all the IADD3 insanity to a new OpIAdd3X opcode

Because of its crazy behavior around overflow, we don't want the full
IADD3 opcode to support any sort of source modifier propagation.  This
makes us a new OpIAdd3X opcode which contains all the crazy and lets
IAdd3 remain the usual 32-bit integer thing everyone knows and loves.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Faith Ekstrand 2023-09-05 01:53:52 -05:00 committed by Marge Bot
parent c5b9d42ac2
commit 0222107699
5 changed files with 108 additions and 57 deletions

View file

@ -122,9 +122,7 @@ pub trait SSABuilder: Builder {
let dst = self.alloc_ssa(RegFile::GPR, 1);
self.push_op(OpIAdd3 {
dst: dst.into(),
overflow: Dst::None,
srcs: [Src::new_zero(), x, y],
carry: Src::new_imm_bool(false),
});
dst
}

View file

@ -551,40 +551,41 @@ impl SM75Instr {
}
fn encode_iadd3(&mut self, op: &OpIAdd3) {
/* TODO: This should happen as part of a legalization pass */
assert!(op.srcs[0].is_reg_or_zero());
if op.srcs[2].is_reg_or_zero() {
self.encode_alu(
0x010,
Some(op.dst),
ALUSrc::from_src(&op.srcs[0]),
ALUSrc::from_src(&op.srcs[1]),
ALUSrc::from_src(&op.srcs[2]),
);
} else {
self.encode_alu(
0x010,
Some(op.dst),
ALUSrc::from_src(&op.srcs[0]),
ALUSrc::from_src(&op.srcs[2]),
ALUSrc::from_src(&op.srcs[1]),
);
}
// Hardware requires at least one of these be unmodified
assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none());
self.set_pred_dst(81..84, op.overflow);
/* Carry for IADD3 is special because the default (register 7) is false
* instead of the usual true and it doesn't have a not modifier.
*/
assert!(op.carry.src_mod.is_none());
self.set_pred_reg(
84..87,
match op.carry.src_ref {
SrcRef::False => RegRef::new(RegFile::Pred, 7, 1),
SrcRef::Reg(reg) => reg,
_ => panic!("Invalid carry source"),
},
self.encode_alu(
0x010,
Some(op.dst),
ALUSrc::from_src(&op.srcs[0]),
ALUSrc::from_src(&op.srcs[1]),
ALUSrc::from_src(&op.srcs[2]),
);
self.set_pred_dst(81..84, Dst::None);
self.set_pred_dst(84..87, Dst::None);
}
fn encode_iadd3x(&mut self, op: &OpIAdd3X) {
// Hardware requires at least one of these be unmodified
assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none());
self.encode_alu(
0x010,
Some(op.dst),
ALUSrc::from_src(&op.srcs[0]),
ALUSrc::from_src(&op.srcs[1]),
ALUSrc::from_src(&op.srcs[2]),
);
// .X
self.set_bit(74, op.high);
self.set_pred_dst(81..84, op.overflow[0]);
self.set_pred_dst(84..87, op.overflow[1]);
self.set_pred_src(87..90, 90, op.carry[0]);
self.set_pred_src(77..80, 80, op.carry[1]);
}
fn encode_imad(&mut self, op: &OpIMad) {
@ -1563,6 +1564,7 @@ impl SM75Instr {
Op::MuFu(op) => si.encode_mufu(&op),
Op::IAbs(op) => si.encode_iabs(&op),
Op::IAdd3(op) => si.encode_iadd3(&op),
Op::IAdd3X(op) => si.encode_iadd3x(&op),
Op::IMad(op) => si.encode_imad(&op),
Op::IMad64(op) => si.encode_imad64(&op),
Op::IMnMx(op) => si.encode_imnmx(&op),

View file

@ -445,17 +445,19 @@ impl<'a> ShaderFromNir<'a> {
let y = srcs[1].as_ssa().unwrap();
let sum = b.alloc_ssa(RegFile::GPR, 2);
let carry = b.alloc_ssa(RegFile::Pred, 1);
b.push_op(OpIAdd3 {
b.push_op(OpIAdd3X {
dst: sum[0].into(),
overflow: carry.into(),
overflow: [carry.into(), Dst::None],
high: false,
srcs: [x[0].into(), y[0].into(), Src::new_zero()],
carry: Src::new_imm_bool(false),
carry: [SrcRef::False.into(), SrcRef::False.into()],
});
b.push_op(OpIAdd3 {
b.push_op(OpIAdd3X {
dst: sum[1].into(),
overflow: Dst::None,
overflow: [Dst::None, Dst::None],
high: true,
srcs: [x[1].into(), y[1].into(), Src::new_zero()],
carry: carry.into(),
carry: [carry.into(), SrcRef::False.into()],
});
sum
} else {
@ -634,9 +636,7 @@ impl<'a> ShaderFromNir<'a> {
let gt_neg = b.ineg(gt.into());
b.push_op(OpIAdd3 {
dst: dst.into(),
overflow: Dst::None,
srcs: [lt.into(), gt_neg.into(), Src::new_zero()],
carry: Src::new_imm_bool(false),
});
}
IntType::I64 => {
@ -644,9 +644,7 @@ impl<'a> ShaderFromNir<'a> {
let gt_neg = b.ineg(gt.into());
b.push_op(OpIAdd3 {
dst: high.into(),
overflow: Dst::None,
srcs: [lt.into(), gt_neg.into(), Src::new_zero()],
carry: Src::new_imm_bool(false),
});
b.push_op(OpShf {
dst: dst.into(),

View file

@ -591,6 +591,13 @@ pub enum Dst {
}
impl Dst {
pub fn is_none(&self) -> bool {
match self {
Dst::None => true,
_ => false,
}
}
pub fn as_reg(&self) -> Option<&RegRef> {
match self {
Dst::Reg(r) => Some(r),
@ -1004,6 +1011,14 @@ impl Src {
}
}
pub fn is_false(&self) -> bool {
match self.src_ref {
SrcRef::True => self.src_mod.is_bnot(),
SrcRef::False => !self.src_mod.is_bnot(),
_ => false,
}
}
pub fn is_reg_or_zero(&self) -> bool {
match self.src_ref {
SrcRef::Zero | SrcRef::SSA(_) | SrcRef::Reg(_) => true,
@ -2080,30 +2095,61 @@ impl fmt::Display for OpINeg {
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIAdd3 {
pub dst: Dst,
pub overflow: Dst,
#[src_type(I32)]
pub srcs: [Src; 3],
#[src_type(Pred)]
pub carry: Src,
}
impl fmt::Display for OpIAdd3 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"IADD3 {{ {} {} }} {{ {}, {}, {}, {} }}",
self.dst,
self.overflow,
self.srcs[0],
self.srcs[1],
self.srcs[2],
self.carry,
"IADD3 {} {{ {}, {}, {} }}",
self.dst, self.srcs[0], self.srcs[1], self.srcs[2],
)
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIAdd3X {
pub dst: Dst,
pub overflow: [Dst; 2],
pub high: bool,
#[src_type(ALU)]
pub srcs: [Src; 3],
#[src_type(Pred)]
pub carry: [Src; 2],
}
impl fmt::Display for OpIAdd3X {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "IADD3")?;
if self.high {
write!(f, ".HI ")?;
} else {
write!(f, ".LO ")?;
}
if self.overflow[0].is_none() && self.overflow[1].is_none() {
write!(f, "{} ", self.dst)?;
} else {
write!(
f,
"{{ {}, {}, {} }} ",
self.dst, self.overflow[0], self.overflow[1],
)?;
}
write!(f, "{{ {}, {}, {}", self.srcs[0], self.srcs[1], self.srcs[2])?;
if self.high {
write!(f, ", {}, {}", self.carry[0], self.carry[1])?;
}
write!(f, " }}")
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIMad {
@ -3574,6 +3620,7 @@ pub enum Op {
IAbs(OpIAbs),
INeg(OpINeg),
IAdd3(OpIAdd3),
IAdd3X(OpIAdd3X),
IMad(OpIMad),
IMad64(OpIMad64),
IMnMx(OpIMnMx),
@ -3950,6 +3997,7 @@ impl Instr {
| Op::IAbs(_)
| Op::INeg(_)
| Op::IAdd3(_)
| Op::IAdd3X(_)
| Op::IMad(_)
| Op::IMad64(_)
| Op::IMnMx(_)
@ -4238,9 +4286,7 @@ impl Shader {
match instr.op {
Op::INeg(neg) => MappedInstrs::One(Instr::new_boxed(OpIAdd3 {
dst: neg.dst,
overflow: Dst::None,
srcs: [Src::new_zero(), neg.src.ineg(), Src::new_zero()],
carry: Src::new_imm_bool(false),
})),
Op::FSOut(out) => {
let mut pcopy = OpParCopy::new();

View file

@ -111,6 +111,13 @@ fn legalize_instr(b: &mut impl SSABuilder, instr: &mut Instr) {
copy_src_if_not_reg(b, src0, RegFile::GPR);
copy_src_if_not_reg(b, src2, RegFile::GPR);
}
Op::IAdd3X(op) => {
let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
swap_srcs_if_not_reg(src0, src1);
swap_srcs_if_not_reg(src2, src1);
copy_src_if_not_reg(b, src0, RegFile::GPR);
copy_src_if_not_reg(b, src2, RegFile::GPR);
}
Op::IMad(op) => {
let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
swap_srcs_if_not_reg(src0, src1);