mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-19 01:40:43 +01:00
nak: add IADD instruction for SM50
For now, we're just using this in place of IAdd3x for 64-bit adds. IADD3 with carry flags is supported on SM50, but it works completely differently from SM75. Longer-term we'll probably want to emit this in all of the places that we're currently using IADD3. Also need to hook the carry register up to calc_deps, but for now I'm just using NAK_DEBUG=serial. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26114>
This commit is contained in:
parent
588cfcaec7
commit
8a82f426a2
5 changed files with 106 additions and 34 deletions
|
|
@ -186,11 +186,20 @@ pub trait SSABuilder: Builder {
|
|||
|
||||
fn iadd(&mut self, x: Src, y: Src) -> SSARef {
|
||||
let dst = self.alloc_ssa(RegFile::GPR, 1);
|
||||
self.push_op(OpIAdd3 {
|
||||
dst: dst.into(),
|
||||
srcs: [Src::new_zero(), x, y],
|
||||
overflow: [Dst::None; 2],
|
||||
});
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpIAdd3 {
|
||||
dst: dst.into(),
|
||||
srcs: [Src::new_zero(), x, y],
|
||||
overflow: [Dst::None; 2],
|
||||
});
|
||||
} else {
|
||||
self.push_op(OpIAdd2 {
|
||||
dst: dst.into(),
|
||||
srcs: [x, y],
|
||||
carry_in: false,
|
||||
carry_out: false,
|
||||
});
|
||||
}
|
||||
dst
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1614,25 +1614,23 @@ impl SM50Instr {
|
|||
self.set_field(10..12, (src_type.bits() / 8).ilog2());
|
||||
}
|
||||
|
||||
fn encode_iadd3(&mut self, op: &OpIAdd3) {
|
||||
fn encode_iadd2(&mut self, op: &OpIAdd2) {
|
||||
/* TODO: support modifiers with imm32 (bit 56) */
|
||||
assert!(op.srcs[0].is_reg_or_zero());
|
||||
|
||||
let src_modifier = Some(ALUSrcsModifier {
|
||||
src0_opt: Some(ALUModifierInfo {
|
||||
abs_bit: None,
|
||||
neg_bit: Some(51),
|
||||
neg_bit: Some(49),
|
||||
}),
|
||||
src1_opt: Some(ALUModifierInfo {
|
||||
abs_bit: None,
|
||||
neg_bit: Some(50),
|
||||
}),
|
||||
src2_opt: Some(ALUModifierInfo {
|
||||
abs_bit: None,
|
||||
neg_bit: Some(49),
|
||||
neg_bit: Some(48),
|
||||
}),
|
||||
src2_opt: None,
|
||||
});
|
||||
let encoding_info = ALUEncodingInfo {
|
||||
opcode: 0xc0,
|
||||
opcode: 0x10,
|
||||
encoding_type: ALUEncodingType::Variant4,
|
||||
reg_modifier: src_modifier,
|
||||
imm24_modifier: src_modifier,
|
||||
|
|
@ -1643,13 +1641,21 @@ impl SM50Instr {
|
|||
}),
|
||||
};
|
||||
|
||||
self.encode_alu(
|
||||
let is_imm32 = self.encode_alu(
|
||||
encoding_info,
|
||||
Some(op.dst),
|
||||
ALUSrc::from_src(&op.srcs[0]),
|
||||
ALUSrc::from_src(&op.srcs[1]),
|
||||
ALUSrc::from_src(&op.srcs[2]),
|
||||
ALUSrc::None,
|
||||
);
|
||||
|
||||
if is_imm32 {
|
||||
self.set_bit(53, op.carry_in);
|
||||
self.set_bit(52, op.carry_out);
|
||||
} else {
|
||||
self.set_bit(43, op.carry_in);
|
||||
self.set_bit(47, op.carry_out);
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_prmt(&mut self, op: &OpPrmt) {
|
||||
|
|
@ -1698,7 +1704,7 @@ impl SM50Instr {
|
|||
Op::FSetP(op) => si.encode_fsetp(&op),
|
||||
Op::MuFu(op) => si.encode_mufu(&op),
|
||||
Op::IAbs(op) => si.encode_iabs(&op),
|
||||
Op::IAdd3(op) => si.encode_iadd3(&op),
|
||||
Op::IAdd2(op) => si.encode_iadd2(&op),
|
||||
Op::Mov(op) => si.encode_mov(&op),
|
||||
Op::Sel(op) => si.encode_sel(&op),
|
||||
Op::PSetP(op) => si.encode_psetp(&op),
|
||||
|
|
|
|||
|
|
@ -757,17 +757,32 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let y = srcs[1].as_ssa().unwrap();
|
||||
let sum = b.alloc_ssa(RegFile::GPR, 2);
|
||||
let carry = b.alloc_ssa(RegFile::Pred, 1);
|
||||
b.push_op(OpIAdd3 {
|
||||
dst: sum[0].into(),
|
||||
overflow: [carry.into(), Dst::None],
|
||||
srcs: [x[0].into(), y[0].into(), 0.into()],
|
||||
});
|
||||
b.push_op(OpIAdd3X {
|
||||
dst: sum[1].into(),
|
||||
overflow: [Dst::None, Dst::None],
|
||||
srcs: [x[1].into(), y[1].into(), 0.into()],
|
||||
carry: [carry.into(), SrcRef::False.into()],
|
||||
});
|
||||
if self.info.sm >= 70 {
|
||||
b.push_op(OpIAdd3 {
|
||||
dst: sum[0].into(),
|
||||
overflow: [carry.into(), Dst::None],
|
||||
srcs: [x[0].into(), y[0].into(), 0.into()],
|
||||
});
|
||||
b.push_op(OpIAdd3X {
|
||||
dst: sum[1].into(),
|
||||
overflow: [Dst::None, Dst::None],
|
||||
srcs: [x[1].into(), y[1].into(), 0.into()],
|
||||
carry: [carry.into(), SrcRef::False.into()],
|
||||
});
|
||||
} else {
|
||||
b.push_op(OpIAdd2 {
|
||||
dst: sum[0].into(),
|
||||
srcs: [x[0].into(), y[0].into()],
|
||||
carry_out: true,
|
||||
carry_in: false,
|
||||
});
|
||||
b.push_op(OpIAdd2 {
|
||||
dst: sum[1].into(),
|
||||
srcs: [x[1].into(), y[1].into()],
|
||||
carry_out: false,
|
||||
carry_in: true,
|
||||
});
|
||||
}
|
||||
sum
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
|
|
|
|||
|
|
@ -2447,6 +2447,33 @@ impl DisplayOp for OpINeg {
|
|||
}
|
||||
impl_display_for_op!(OpINeg);
|
||||
|
||||
/// Only used on SM50
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpIAdd2 {
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(ALU)]
|
||||
pub srcs: [Src; 2],
|
||||
|
||||
// TODO: We should probably track this as an SSA value somehow
|
||||
pub carry_out: bool,
|
||||
pub carry_in: bool,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpIAdd2 {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "iadd")?;
|
||||
if self.carry_in {
|
||||
write!(f, ".x")?;
|
||||
}
|
||||
if self.carry_out {
|
||||
write!(f, ".cc")?;
|
||||
}
|
||||
write!(f, " {} {}", self.srcs[0], self.srcs[1])
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpIAdd3 {
|
||||
|
|
@ -4407,6 +4434,7 @@ pub enum Op {
|
|||
Flo(OpFlo),
|
||||
IAbs(OpIAbs),
|
||||
INeg(OpINeg),
|
||||
IAdd2(OpIAdd2),
|
||||
IAdd3(OpIAdd3),
|
||||
IAdd3X(OpIAdd3X),
|
||||
IDp4(OpIDp4),
|
||||
|
|
@ -4842,6 +4870,7 @@ impl Instr {
|
|||
Op::Brev(_) | Op::Flo(_) | Op::PopC(_) => false,
|
||||
Op::IAbs(_)
|
||||
| Op::INeg(_)
|
||||
| Op::IAdd2(_)
|
||||
| Op::IAdd3(_)
|
||||
| Op::IAdd3X(_)
|
||||
| Op::IDp4(_)
|
||||
|
|
@ -5414,13 +5443,26 @@ impl Shader {
|
|||
}
|
||||
|
||||
pub fn lower_ineg(&mut self) {
|
||||
self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
|
||||
let sm = self.info.sm;
|
||||
self.map_instrs(|mut instr: Box<Instr>, _| -> MappedInstrs {
|
||||
match instr.op {
|
||||
Op::INeg(neg) => MappedInstrs::One(Instr::new_boxed(OpIAdd3 {
|
||||
dst: neg.dst,
|
||||
overflow: [Dst::None; 2],
|
||||
srcs: [Src::new_zero(), neg.src.ineg(), Src::new_zero()],
|
||||
})),
|
||||
Op::INeg(neg) => {
|
||||
if sm >= 75 {
|
||||
instr.op = Op::IAdd3(OpIAdd3 {
|
||||
dst: neg.dst,
|
||||
overflow: [Dst::None; 2],
|
||||
srcs: [0.into(), neg.src.ineg(), 0.into()],
|
||||
});
|
||||
} else {
|
||||
instr.op = Op::IAdd2(OpIAdd2 {
|
||||
dst: neg.dst,
|
||||
srcs: [0.into(), neg.src.ineg()],
|
||||
carry_in: false,
|
||||
carry_out: false,
|
||||
});
|
||||
}
|
||||
MappedInstrs::One(instr)
|
||||
}
|
||||
_ => MappedInstrs::One(instr),
|
||||
}
|
||||
})
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ fn legalize_sm50_instr(
|
|||
Op::Sel(op) => {
|
||||
copy_src_if_not_reg(b, &mut op.srcs[1], RegFile::GPR);
|
||||
}
|
||||
Op::IAdd3(op) => {
|
||||
Op::IAdd2(op) => {
|
||||
copy_src_if_not_reg(b, &mut op.srcs[1], RegFile::GPR);
|
||||
}
|
||||
Op::I2F(op) => {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue