mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
nak: Move all the IADD3 insanity to a new OpIAdd3X opcode
Because of its crazy behavior around overflow, we don't want the full IADD3 opcode to support any sort of source modifier propagation. This makes us a new OpIAdd3X opcode which contains all the crazy and lets IAdd3 remain the usual 32-bit integer thing everyone knows and loves. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
parent
c5b9d42ac2
commit
0222107699
5 changed files with 108 additions and 57 deletions
|
|
@ -122,9 +122,7 @@ pub trait SSABuilder: Builder {
|
|||
let dst = self.alloc_ssa(RegFile::GPR, 1);
|
||||
self.push_op(OpIAdd3 {
|
||||
dst: dst.into(),
|
||||
overflow: Dst::None,
|
||||
srcs: [Src::new_zero(), x, y],
|
||||
carry: Src::new_imm_bool(false),
|
||||
});
|
||||
dst
|
||||
}
|
||||
|
|
|
|||
|
|
@ -551,40 +551,41 @@ impl SM75Instr {
|
|||
}
|
||||
|
||||
fn encode_iadd3(&mut self, op: &OpIAdd3) {
|
||||
/* TODO: This should happen as part of a legalization pass */
|
||||
assert!(op.srcs[0].is_reg_or_zero());
|
||||
if op.srcs[2].is_reg_or_zero() {
|
||||
self.encode_alu(
|
||||
0x010,
|
||||
Some(op.dst),
|
||||
ALUSrc::from_src(&op.srcs[0]),
|
||||
ALUSrc::from_src(&op.srcs[1]),
|
||||
ALUSrc::from_src(&op.srcs[2]),
|
||||
);
|
||||
} else {
|
||||
self.encode_alu(
|
||||
0x010,
|
||||
Some(op.dst),
|
||||
ALUSrc::from_src(&op.srcs[0]),
|
||||
ALUSrc::from_src(&op.srcs[2]),
|
||||
ALUSrc::from_src(&op.srcs[1]),
|
||||
);
|
||||
}
|
||||
// Hardware requires at least one of these be unmodified
|
||||
assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none());
|
||||
|
||||
self.set_pred_dst(81..84, op.overflow);
|
||||
|
||||
/* Carry for IADD3 is special because the default (register 7) is false
|
||||
* instead of the usual true and it doesn't have a not modifier.
|
||||
*/
|
||||
assert!(op.carry.src_mod.is_none());
|
||||
self.set_pred_reg(
|
||||
84..87,
|
||||
match op.carry.src_ref {
|
||||
SrcRef::False => RegRef::new(RegFile::Pred, 7, 1),
|
||||
SrcRef::Reg(reg) => reg,
|
||||
_ => panic!("Invalid carry source"),
|
||||
},
|
||||
self.encode_alu(
|
||||
0x010,
|
||||
Some(op.dst),
|
||||
ALUSrc::from_src(&op.srcs[0]),
|
||||
ALUSrc::from_src(&op.srcs[1]),
|
||||
ALUSrc::from_src(&op.srcs[2]),
|
||||
);
|
||||
|
||||
self.set_pred_dst(81..84, Dst::None);
|
||||
self.set_pred_dst(84..87, Dst::None);
|
||||
}
|
||||
|
||||
fn encode_iadd3x(&mut self, op: &OpIAdd3X) {
|
||||
// Hardware requires at least one of these be unmodified
|
||||
assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none());
|
||||
|
||||
self.encode_alu(
|
||||
0x010,
|
||||
Some(op.dst),
|
||||
ALUSrc::from_src(&op.srcs[0]),
|
||||
ALUSrc::from_src(&op.srcs[1]),
|
||||
ALUSrc::from_src(&op.srcs[2]),
|
||||
);
|
||||
|
||||
// .X
|
||||
self.set_bit(74, op.high);
|
||||
|
||||
self.set_pred_dst(81..84, op.overflow[0]);
|
||||
self.set_pred_dst(84..87, op.overflow[1]);
|
||||
|
||||
self.set_pred_src(87..90, 90, op.carry[0]);
|
||||
self.set_pred_src(77..80, 80, op.carry[1]);
|
||||
}
|
||||
|
||||
fn encode_imad(&mut self, op: &OpIMad) {
|
||||
|
|
@ -1563,6 +1564,7 @@ impl SM75Instr {
|
|||
Op::MuFu(op) => si.encode_mufu(&op),
|
||||
Op::IAbs(op) => si.encode_iabs(&op),
|
||||
Op::IAdd3(op) => si.encode_iadd3(&op),
|
||||
Op::IAdd3X(op) => si.encode_iadd3x(&op),
|
||||
Op::IMad(op) => si.encode_imad(&op),
|
||||
Op::IMad64(op) => si.encode_imad64(&op),
|
||||
Op::IMnMx(op) => si.encode_imnmx(&op),
|
||||
|
|
|
|||
|
|
@ -445,17 +445,19 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let y = srcs[1].as_ssa().unwrap();
|
||||
let sum = b.alloc_ssa(RegFile::GPR, 2);
|
||||
let carry = b.alloc_ssa(RegFile::Pred, 1);
|
||||
b.push_op(OpIAdd3 {
|
||||
b.push_op(OpIAdd3X {
|
||||
dst: sum[0].into(),
|
||||
overflow: carry.into(),
|
||||
overflow: [carry.into(), Dst::None],
|
||||
high: false,
|
||||
srcs: [x[0].into(), y[0].into(), Src::new_zero()],
|
||||
carry: Src::new_imm_bool(false),
|
||||
carry: [SrcRef::False.into(), SrcRef::False.into()],
|
||||
});
|
||||
b.push_op(OpIAdd3 {
|
||||
b.push_op(OpIAdd3X {
|
||||
dst: sum[1].into(),
|
||||
overflow: Dst::None,
|
||||
overflow: [Dst::None, Dst::None],
|
||||
high: true,
|
||||
srcs: [x[1].into(), y[1].into(), Src::new_zero()],
|
||||
carry: carry.into(),
|
||||
carry: [carry.into(), SrcRef::False.into()],
|
||||
});
|
||||
sum
|
||||
} else {
|
||||
|
|
@ -634,9 +636,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let gt_neg = b.ineg(gt.into());
|
||||
b.push_op(OpIAdd3 {
|
||||
dst: dst.into(),
|
||||
overflow: Dst::None,
|
||||
srcs: [lt.into(), gt_neg.into(), Src::new_zero()],
|
||||
carry: Src::new_imm_bool(false),
|
||||
});
|
||||
}
|
||||
IntType::I64 => {
|
||||
|
|
@ -644,9 +644,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let gt_neg = b.ineg(gt.into());
|
||||
b.push_op(OpIAdd3 {
|
||||
dst: high.into(),
|
||||
overflow: Dst::None,
|
||||
srcs: [lt.into(), gt_neg.into(), Src::new_zero()],
|
||||
carry: Src::new_imm_bool(false),
|
||||
});
|
||||
b.push_op(OpShf {
|
||||
dst: dst.into(),
|
||||
|
|
|
|||
|
|
@ -591,6 +591,13 @@ pub enum Dst {
|
|||
}
|
||||
|
||||
impl Dst {
|
||||
pub fn is_none(&self) -> bool {
|
||||
match self {
|
||||
Dst::None => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_reg(&self) -> Option<&RegRef> {
|
||||
match self {
|
||||
Dst::Reg(r) => Some(r),
|
||||
|
|
@ -1004,6 +1011,14 @@ impl Src {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn is_false(&self) -> bool {
|
||||
match self.src_ref {
|
||||
SrcRef::True => self.src_mod.is_bnot(),
|
||||
SrcRef::False => !self.src_mod.is_bnot(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_reg_or_zero(&self) -> bool {
|
||||
match self.src_ref {
|
||||
SrcRef::Zero | SrcRef::SSA(_) | SrcRef::Reg(_) => true,
|
||||
|
|
@ -2080,30 +2095,61 @@ impl fmt::Display for OpINeg {
|
|||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpIAdd3 {
|
||||
pub dst: Dst,
|
||||
pub overflow: Dst,
|
||||
|
||||
#[src_type(I32)]
|
||||
pub srcs: [Src; 3],
|
||||
|
||||
#[src_type(Pred)]
|
||||
pub carry: Src,
|
||||
}
|
||||
|
||||
impl fmt::Display for OpIAdd3 {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"IADD3 {{ {} {} }} {{ {}, {}, {}, {} }}",
|
||||
self.dst,
|
||||
self.overflow,
|
||||
self.srcs[0],
|
||||
self.srcs[1],
|
||||
self.srcs[2],
|
||||
self.carry,
|
||||
"IADD3 {} {{ {}, {}, {} }}",
|
||||
self.dst, self.srcs[0], self.srcs[1], self.srcs[2],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpIAdd3X {
|
||||
pub dst: Dst,
|
||||
pub overflow: [Dst; 2],
|
||||
|
||||
pub high: bool,
|
||||
|
||||
#[src_type(ALU)]
|
||||
pub srcs: [Src; 3],
|
||||
|
||||
#[src_type(Pred)]
|
||||
pub carry: [Src; 2],
|
||||
}
|
||||
|
||||
impl fmt::Display for OpIAdd3X {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "IADD3")?;
|
||||
if self.high {
|
||||
write!(f, ".HI ")?;
|
||||
} else {
|
||||
write!(f, ".LO ")?;
|
||||
}
|
||||
if self.overflow[0].is_none() && self.overflow[1].is_none() {
|
||||
write!(f, "{} ", self.dst)?;
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"{{ {}, {}, {} }} ",
|
||||
self.dst, self.overflow[0], self.overflow[1],
|
||||
)?;
|
||||
}
|
||||
write!(f, "{{ {}, {}, {}", self.srcs[0], self.srcs[1], self.srcs[2])?;
|
||||
if self.high {
|
||||
write!(f, ", {}, {}", self.carry[0], self.carry[1])?;
|
||||
}
|
||||
write!(f, " }}")
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpIMad {
|
||||
|
|
@ -3574,6 +3620,7 @@ pub enum Op {
|
|||
IAbs(OpIAbs),
|
||||
INeg(OpINeg),
|
||||
IAdd3(OpIAdd3),
|
||||
IAdd3X(OpIAdd3X),
|
||||
IMad(OpIMad),
|
||||
IMad64(OpIMad64),
|
||||
IMnMx(OpIMnMx),
|
||||
|
|
@ -3950,6 +3997,7 @@ impl Instr {
|
|||
| Op::IAbs(_)
|
||||
| Op::INeg(_)
|
||||
| Op::IAdd3(_)
|
||||
| Op::IAdd3X(_)
|
||||
| Op::IMad(_)
|
||||
| Op::IMad64(_)
|
||||
| Op::IMnMx(_)
|
||||
|
|
@ -4238,9 +4286,7 @@ impl Shader {
|
|||
match instr.op {
|
||||
Op::INeg(neg) => MappedInstrs::One(Instr::new_boxed(OpIAdd3 {
|
||||
dst: neg.dst,
|
||||
overflow: Dst::None,
|
||||
srcs: [Src::new_zero(), neg.src.ineg(), Src::new_zero()],
|
||||
carry: Src::new_imm_bool(false),
|
||||
})),
|
||||
Op::FSOut(out) => {
|
||||
let mut pcopy = OpParCopy::new();
|
||||
|
|
|
|||
|
|
@ -111,6 +111,13 @@ fn legalize_instr(b: &mut impl SSABuilder, instr: &mut Instr) {
|
|||
copy_src_if_not_reg(b, src0, RegFile::GPR);
|
||||
copy_src_if_not_reg(b, src2, RegFile::GPR);
|
||||
}
|
||||
Op::IAdd3X(op) => {
|
||||
let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
|
||||
swap_srcs_if_not_reg(src0, src1);
|
||||
swap_srcs_if_not_reg(src2, src1);
|
||||
copy_src_if_not_reg(b, src0, RegFile::GPR);
|
||||
copy_src_if_not_reg(b, src2, RegFile::GPR);
|
||||
}
|
||||
Op::IMad(op) => {
|
||||
let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
|
||||
swap_srcs_if_not_reg(src0, src1);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue