nak: add IADD instruction for SM50

For now, we're just using this in place of IAdd3x for 64-bit adds. IADD3
with carry flags is supported on SM50, but it works completely
differently from SM75. Longer-term we'll probably want to emit this in
all of the places that we're currently using IADD3.

Also need to hook the carry register up to calc_deps, but for now I'm
just using NAK_DEBUG=serial.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26114>
This commit is contained in:
Benjamin Lee 2023-10-24 16:24:04 -07:00 committed by Marge Bot
parent 588cfcaec7
commit 8a82f426a2
5 changed files with 106 additions and 34 deletions

View file

@ -186,11 +186,20 @@ pub trait SSABuilder: Builder {
fn iadd(&mut self, x: Src, y: Src) -> SSARef {
let dst = self.alloc_ssa(RegFile::GPR, 1);
self.push_op(OpIAdd3 {
dst: dst.into(),
srcs: [Src::new_zero(), x, y],
overflow: [Dst::None; 2],
});
if self.sm() >= 70 {
self.push_op(OpIAdd3 {
dst: dst.into(),
srcs: [Src::new_zero(), x, y],
overflow: [Dst::None; 2],
});
} else {
self.push_op(OpIAdd2 {
dst: dst.into(),
srcs: [x, y],
carry_in: false,
carry_out: false,
});
}
dst
}

View file

@ -1614,25 +1614,23 @@ impl SM50Instr {
self.set_field(10..12, (src_type.bits() / 8).ilog2());
}
fn encode_iadd3(&mut self, op: &OpIAdd3) {
fn encode_iadd2(&mut self, op: &OpIAdd2) {
/* TODO: support modifiers with imm32 (bit 56) */
assert!(op.srcs[0].is_reg_or_zero());
let src_modifier = Some(ALUSrcsModifier {
src0_opt: Some(ALUModifierInfo {
abs_bit: None,
neg_bit: Some(51),
neg_bit: Some(49),
}),
src1_opt: Some(ALUModifierInfo {
abs_bit: None,
neg_bit: Some(50),
}),
src2_opt: Some(ALUModifierInfo {
abs_bit: None,
neg_bit: Some(49),
neg_bit: Some(48),
}),
src2_opt: None,
});
let encoding_info = ALUEncodingInfo {
opcode: 0xc0,
opcode: 0x10,
encoding_type: ALUEncodingType::Variant4,
reg_modifier: src_modifier,
imm24_modifier: src_modifier,
@ -1643,13 +1641,21 @@ impl SM50Instr {
}),
};
self.encode_alu(
let is_imm32 = self.encode_alu(
encoding_info,
Some(op.dst),
ALUSrc::from_src(&op.srcs[0]),
ALUSrc::from_src(&op.srcs[1]),
ALUSrc::from_src(&op.srcs[2]),
ALUSrc::None,
);
if is_imm32 {
self.set_bit(53, op.carry_in);
self.set_bit(52, op.carry_out);
} else {
self.set_bit(43, op.carry_in);
self.set_bit(47, op.carry_out);
}
}
fn encode_prmt(&mut self, op: &OpPrmt) {
@ -1698,7 +1704,7 @@ impl SM50Instr {
Op::FSetP(op) => si.encode_fsetp(&op),
Op::MuFu(op) => si.encode_mufu(&op),
Op::IAbs(op) => si.encode_iabs(&op),
Op::IAdd3(op) => si.encode_iadd3(&op),
Op::IAdd2(op) => si.encode_iadd2(&op),
Op::Mov(op) => si.encode_mov(&op),
Op::Sel(op) => si.encode_sel(&op),
Op::PSetP(op) => si.encode_psetp(&op),

View file

@ -757,17 +757,32 @@ impl<'a> ShaderFromNir<'a> {
let y = srcs[1].as_ssa().unwrap();
let sum = b.alloc_ssa(RegFile::GPR, 2);
let carry = b.alloc_ssa(RegFile::Pred, 1);
b.push_op(OpIAdd3 {
dst: sum[0].into(),
overflow: [carry.into(), Dst::None],
srcs: [x[0].into(), y[0].into(), 0.into()],
});
b.push_op(OpIAdd3X {
dst: sum[1].into(),
overflow: [Dst::None, Dst::None],
srcs: [x[1].into(), y[1].into(), 0.into()],
carry: [carry.into(), SrcRef::False.into()],
});
if self.info.sm >= 70 {
b.push_op(OpIAdd3 {
dst: sum[0].into(),
overflow: [carry.into(), Dst::None],
srcs: [x[0].into(), y[0].into(), 0.into()],
});
b.push_op(OpIAdd3X {
dst: sum[1].into(),
overflow: [Dst::None, Dst::None],
srcs: [x[1].into(), y[1].into(), 0.into()],
carry: [carry.into(), SrcRef::False.into()],
});
} else {
b.push_op(OpIAdd2 {
dst: sum[0].into(),
srcs: [x[0].into(), y[0].into()],
carry_out: true,
carry_in: false,
});
b.push_op(OpIAdd2 {
dst: sum[1].into(),
srcs: [x[1].into(), y[1].into()],
carry_out: false,
carry_in: true,
});
}
sum
} else {
assert!(alu.def.bit_size() == 32);

View file

@ -2447,6 +2447,33 @@ impl DisplayOp for OpINeg {
}
impl_display_for_op!(OpINeg);
/// Only used on SM50
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIAdd2 {
pub dst: Dst,
#[src_type(ALU)]
pub srcs: [Src; 2],
// TODO: We should probably track this as an SSA value somehow
pub carry_out: bool,
pub carry_in: bool,
}
impl DisplayOp for OpIAdd2 {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "iadd")?;
if self.carry_in {
write!(f, ".x")?;
}
if self.carry_out {
write!(f, ".cc")?;
}
write!(f, " {} {}", self.srcs[0], self.srcs[1])
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIAdd3 {
@ -4407,6 +4434,7 @@ pub enum Op {
Flo(OpFlo),
IAbs(OpIAbs),
INeg(OpINeg),
IAdd2(OpIAdd2),
IAdd3(OpIAdd3),
IAdd3X(OpIAdd3X),
IDp4(OpIDp4),
@ -4842,6 +4870,7 @@ impl Instr {
Op::Brev(_) | Op::Flo(_) | Op::PopC(_) => false,
Op::IAbs(_)
| Op::INeg(_)
| Op::IAdd2(_)
| Op::IAdd3(_)
| Op::IAdd3X(_)
| Op::IDp4(_)
@ -5414,13 +5443,26 @@ impl Shader {
}
pub fn lower_ineg(&mut self) {
self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
let sm = self.info.sm;
self.map_instrs(|mut instr: Box<Instr>, _| -> MappedInstrs {
match instr.op {
Op::INeg(neg) => MappedInstrs::One(Instr::new_boxed(OpIAdd3 {
dst: neg.dst,
overflow: [Dst::None; 2],
srcs: [Src::new_zero(), neg.src.ineg(), Src::new_zero()],
})),
Op::INeg(neg) => {
if sm >= 75 {
instr.op = Op::IAdd3(OpIAdd3 {
dst: neg.dst,
overflow: [Dst::None; 2],
srcs: [0.into(), neg.src.ineg(), 0.into()],
});
} else {
instr.op = Op::IAdd2(OpIAdd2 {
dst: neg.dst,
srcs: [0.into(), neg.src.ineg()],
carry_in: false,
carry_out: false,
});
}
MappedInstrs::One(instr)
}
_ => MappedInstrs::One(instr),
}
})

View file

@ -122,7 +122,7 @@ fn legalize_sm50_instr(
Op::Sel(op) => {
copy_src_if_not_reg(b, &mut op.srcs[1], RegFile::GPR);
}
Op::IAdd3(op) => {
Op::IAdd2(op) => {
copy_src_if_not_reg(b, &mut op.srcs[1], RegFile::GPR);
}
Op::I2F(op) => {