mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 06:00:10 +01:00
nak: implement SHL and SHR on SM50
SHF.{L,R} is supported, but it seems to always write 0 to dst when the
shift value is a register. The only case in nak_from_nir that actually
uses the 64-bit shift is nir_op_isign, which has an immediate shift
value.
This also avoids the SHF.I32 issue, since the only usage is now SHF.I64.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26114>
This commit is contained in:
parent
286b832f74
commit
00be041ffc
5 changed files with 176 additions and 37 deletions
|
|
@ -121,6 +121,55 @@ pub trait Builder {
|
|||
pub trait SSABuilder: Builder {
|
||||
fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef;
|
||||
|
||||
fn shl(&mut self, x: Src, shift: Src) -> SSARef {
|
||||
let dst = self.alloc_ssa(RegFile::GPR, 1);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpShf {
|
||||
dst: dst.into(),
|
||||
low: x,
|
||||
high: 0.into(),
|
||||
shift: shift,
|
||||
right: false,
|
||||
wrap: false,
|
||||
data_type: IntType::I32,
|
||||
dst_high: false,
|
||||
});
|
||||
} else {
|
||||
self.push_op(OpShl {
|
||||
dst: dst.into(),
|
||||
src: x,
|
||||
shift: shift,
|
||||
wrap: false,
|
||||
});
|
||||
}
|
||||
dst
|
||||
}
|
||||
|
||||
fn shr(&mut self, x: Src, shift: Src, signed: bool) -> SSARef {
|
||||
let dst = self.alloc_ssa(RegFile::GPR, 1);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpShf {
|
||||
dst: dst.into(),
|
||||
low: 0.into(),
|
||||
high: x,
|
||||
shift: shift,
|
||||
right: true,
|
||||
wrap: false,
|
||||
data_type: if signed { IntType::I32 } else { IntType::U32 },
|
||||
dst_high: true,
|
||||
});
|
||||
} else {
|
||||
self.push_op(OpShr {
|
||||
dst: dst.into(),
|
||||
src: x,
|
||||
shift: shift,
|
||||
wrap: false,
|
||||
signed,
|
||||
});
|
||||
}
|
||||
dst
|
||||
}
|
||||
|
||||
fn fadd(&mut self, x: Src, y: Src) -> SSARef {
|
||||
let dst = self.alloc_ssa(RegFile::GPR, 1);
|
||||
self.push_op(OpFAdd {
|
||||
|
|
|
|||
|
|
@ -544,6 +544,51 @@ impl SM50Instr {
|
|||
self.set_bit(50, op.wrap);
|
||||
}
|
||||
|
||||
fn encode_shl(&mut self, op: &OpShl) {
|
||||
self.set_dst(op.dst);
|
||||
self.set_reg_src(8..16, op.src);
|
||||
match op.shift.src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5c48);
|
||||
self.set_reg_src(20..28, op.shift);
|
||||
}
|
||||
SrcRef::Imm32(i) => {
|
||||
self.set_opcode(0x3848);
|
||||
self.set_src_imm_i20(20..39, 56, i);
|
||||
}
|
||||
SrcRef::CBuf(cb) => {
|
||||
self.set_opcode(0x4c48);
|
||||
self.set_src_cb(20..39, &cb);
|
||||
}
|
||||
src1 => panic!("unsupported src1 type for SHL: {src1}"),
|
||||
}
|
||||
|
||||
self.set_bit(39, op.wrap);
|
||||
}
|
||||
|
||||
fn encode_shr(&mut self, op: &OpShr) {
|
||||
self.set_dst(op.dst);
|
||||
self.set_reg_src(8..16, op.src);
|
||||
match op.shift.src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5c28);
|
||||
self.set_reg_src(20..28, op.shift);
|
||||
}
|
||||
SrcRef::Imm32(i) => {
|
||||
self.set_opcode(0x3828);
|
||||
self.set_src_imm_i20(20..39, 56, i);
|
||||
}
|
||||
SrcRef::CBuf(cb) => {
|
||||
self.set_opcode(0x4c28);
|
||||
self.set_src_cb(20..39, &cb);
|
||||
}
|
||||
src1 => panic!("unsupported src1 type for SHL: {src1}"),
|
||||
}
|
||||
|
||||
self.set_bit(39, op.wrap);
|
||||
self.set_bit(48, op.signed);
|
||||
}
|
||||
|
||||
fn encode_i2f(&mut self, op: &OpI2F) {
|
||||
let abs_bit = 49;
|
||||
let neg_bit = 45;
|
||||
|
|
@ -1604,6 +1649,8 @@ impl SM50Instr {
|
|||
Op::St(op) => si.encode_st(&op),
|
||||
Op::Lop2(op) => si.encode_lop2(&op),
|
||||
Op::Shf(op) => si.encode_shf(&op),
|
||||
Op::Shl(op) => si.encode_shl(&op),
|
||||
Op::Shr(op) => si.encode_shr(&op),
|
||||
Op::F2F(op) => si.encode_f2f(&op),
|
||||
Op::F2I(op) => si.encode_f2i(&op),
|
||||
Op::I2F(op) => si.encode_i2f(&op),
|
||||
|
|
|
|||
|
|
@ -899,18 +899,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
dst
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpShf {
|
||||
dst: dst.into(),
|
||||
low: x.into(),
|
||||
high: 0.into(),
|
||||
shift: shift,
|
||||
right: false,
|
||||
wrap: true,
|
||||
data_type: IntType::U32,
|
||||
dst_high: false,
|
||||
});
|
||||
dst
|
||||
b.shl(srcs[0], srcs[1])
|
||||
}
|
||||
}
|
||||
nir_op_ishr => {
|
||||
|
|
@ -944,18 +933,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
dst
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpShf {
|
||||
dst: dst.into(),
|
||||
low: 0.into(),
|
||||
high: x.into(),
|
||||
shift: shift,
|
||||
right: true,
|
||||
wrap: true,
|
||||
data_type: IntType::I32,
|
||||
dst_high: true,
|
||||
});
|
||||
dst
|
||||
b.shr(srcs[0], srcs[1], true)
|
||||
}
|
||||
}
|
||||
nir_op_ixor => b.lop2(LogicOp2::Xor, srcs[0], srcs[1]),
|
||||
|
|
@ -1153,18 +1131,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
dst
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpShf {
|
||||
dst: dst.into(),
|
||||
low: x.into(),
|
||||
high: 0.into(),
|
||||
shift: shift,
|
||||
right: true,
|
||||
wrap: true,
|
||||
data_type: IntType::U32,
|
||||
dst_high: false,
|
||||
});
|
||||
dst
|
||||
b.shr(srcs[0], srcs[1], false)
|
||||
}
|
||||
}
|
||||
nir_op_fddx | nir_op_fddx_coarse | nir_op_fddx_fine => {
|
||||
|
|
|
|||
|
|
@ -2816,6 +2816,60 @@ impl DisplayOp for OpShf {
|
|||
}
|
||||
impl_display_for_op!(OpShf);
|
||||
|
||||
/// Only used on SM50
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpShl {
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(GPR)]
|
||||
pub src: Src,
|
||||
|
||||
#[src_type(ALU)]
|
||||
pub shift: Src,
|
||||
|
||||
pub wrap: bool,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpShl {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "shl")?;
|
||||
if self.wrap {
|
||||
write!(f, ".w")?;
|
||||
}
|
||||
write!(f, " {} {}", self.src, self.shift)
|
||||
}
|
||||
}
|
||||
|
||||
/// Only used on SM50
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpShr {
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(GPR)]
|
||||
pub src: Src,
|
||||
|
||||
#[src_type(ALU)]
|
||||
pub shift: Src,
|
||||
|
||||
pub wrap: bool,
|
||||
pub signed: bool,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpShr {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "shr")?;
|
||||
if self.wrap {
|
||||
write!(f, ".w")?;
|
||||
}
|
||||
if !self.signed {
|
||||
write!(f, ".u32")?;
|
||||
}
|
||||
write!(f, " {} {}", self.src, self.shift)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(DstsAsSlice)]
|
||||
pub struct OpF2F {
|
||||
|
|
@ -4511,6 +4565,8 @@ pub enum Op {
|
|||
Lop3(OpLop3),
|
||||
PopC(OpPopC),
|
||||
Shf(OpShf),
|
||||
Shl(OpShl),
|
||||
Shr(OpShr),
|
||||
F2F(OpF2F),
|
||||
F2I(OpF2I),
|
||||
I2F(OpI2F),
|
||||
|
|
@ -4945,7 +5001,9 @@ impl Instr {
|
|||
| Op::ISetP(_)
|
||||
| Op::Lop2(_)
|
||||
| Op::Lop3(_)
|
||||
| Op::Shf(_) => true,
|
||||
| Op::Shf(_)
|
||||
| Op::Shl(_)
|
||||
| Op::Shr(_) => true,
|
||||
|
||||
// Conversions are variable latency?!?
|
||||
Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::FRnd(_) => false,
|
||||
|
|
|
|||
|
|
@ -72,6 +72,16 @@ fn swap_srcs_if_not_reg(x: &mut Src, y: &mut Src) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
fn copy_src_if_i20_overflow(
|
||||
b: &mut impl SSABuilder,
|
||||
src: &mut Src,
|
||||
file: RegFile,
|
||||
) {
|
||||
if src.as_imm_not_i20().is_some() {
|
||||
copy_src(b, src, file);
|
||||
}
|
||||
}
|
||||
|
||||
fn legalize_sm50_instr(
|
||||
b: &mut impl SSABuilder,
|
||||
_bl: &impl BlockLiveness,
|
||||
|
|
@ -83,6 +93,14 @@ fn legalize_sm50_instr(
|
|||
copy_src_if_not_reg(b, &mut op.shift, RegFile::GPR);
|
||||
copy_src_if_not_reg(b, &mut op.high, RegFile::GPR);
|
||||
}
|
||||
Op::Shl(op) => {
|
||||
copy_src_if_not_reg(b, &mut op.src, RegFile::GPR);
|
||||
copy_src_if_i20_overflow(b, &mut op.shift, RegFile::GPR);
|
||||
}
|
||||
Op::Shr(op) => {
|
||||
copy_src_if_not_reg(b, &mut op.src, RegFile::GPR);
|
||||
copy_src_if_i20_overflow(b, &mut op.shift, RegFile::GPR);
|
||||
}
|
||||
Op::FAdd(op) => {
|
||||
let [ref mut src0, ref mut src1] = op.srcs;
|
||||
swap_srcs_if_not_reg(src0, src1);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue