diff --git a/src/nouveau/compiler/nak/encode_sm70.rs b/src/nouveau/compiler/nak/encode_sm70.rs index c8aace7f4fb..00ecf30ac66 100644 --- a/src/nouveau/compiler/nak/encode_sm70.rs +++ b/src/nouveau/compiler/nak/encode_sm70.rs @@ -11,12 +11,14 @@ struct ALURegRef { pub reg: RegRef, pub abs: bool, pub neg: bool, + pub swizzle: SrcSwizzle, } struct ALUCBufRef { pub cb: CBufRef, pub abs: bool, pub neg: bool, + pub swizzle: SrcSwizzle, } enum ALUSrc { @@ -72,6 +74,7 @@ impl ALUSrc { reg: reg, abs: src_mod_has_abs(src.src_mod), neg: src_mod_has_neg(src.src_mod), + swizzle: src.src_swizzle, }; match reg.file() { RegFile::GPR => ALUSrc::Reg(alu_ref), @@ -81,6 +84,7 @@ impl ALUSrc { } SrcRef::Imm32(i) => { assert!(src.src_mod.is_none()); + assert!(src.src_swizzle.is_none()); ALUSrc::Imm32(i) } SrcRef::CBuf(cb) => { @@ -88,6 +92,7 @@ impl ALUSrc { cb: cb, abs: src_mod_has_abs(src.src_mod), neg: src_mod_has_neg(src.src_mod), + swizzle: src.src_swizzle, }; ALUSrc::CBuf(alu_ref) } @@ -265,16 +270,43 @@ impl SM70Instr { self.set_bar_reg(range, *src.src_ref.as_reg().unwrap()); } + fn set_swizzle(&mut self, range: Range, swizzle: SrcSwizzle) { + assert!(range.len() == 2); + + self.set_field( + range, + match swizzle { + SrcSwizzle::None => 0x00_u8, + SrcSwizzle::Xx => 0x02_u8, + SrcSwizzle::Yy => 0x03_u8, + }, + ); + } + fn set_alu_reg( &mut self, range: Range, abs_bit: usize, neg_bit: usize, + swizzle_range: Range, + is_fp16_alu: bool, + has_mod: bool, reg: &ALURegRef, ) { self.set_reg(range, reg.reg); - self.set_bit(abs_bit, reg.abs); - self.set_bit(neg_bit, reg.neg); + + if has_mod { + self.set_bit(abs_bit, reg.abs); + self.set_bit(neg_bit, reg.neg); + } else { + assert!(!reg.abs && !reg.neg); + } + + if is_fp16_alu { + self.set_swizzle(swizzle_range, reg.swizzle); + } else { + assert!(reg.swizzle == SrcSwizzle::None); + } } fn set_alu_ureg( @@ -282,11 +314,25 @@ impl SM70Instr { range: Range, abs_bit: usize, neg_bit: usize, + swizzle_range: Range, + is_fp16_alu: bool, + has_mod: bool, reg: &ALURegRef, ) { self.set_ureg(range, reg.reg); - self.set_bit(abs_bit, reg.abs); - self.set_bit(neg_bit, reg.neg); + + if has_mod { + self.set_bit(abs_bit, reg.abs); + self.set_bit(neg_bit, reg.neg); + } else { + assert!(!reg.abs && !reg.neg); + } + + if is_fp16_alu { + self.set_swizzle(swizzle_range, reg.swizzle); + } else { + assert!(reg.swizzle == SrcSwizzle::None); + } } fn set_alu_cb( @@ -294,11 +340,25 @@ impl SM70Instr { range: Range, abs_bit: usize, neg_bit: usize, + swizzle_range: Range, + is_fp16_alu: bool, + has_mod: bool, cb: &ALUCBufRef, ) { self.set_src_cb(range, &cb.cb); - self.set_bit(abs_bit, cb.abs); - self.set_bit(neg_bit, cb.neg); + + if has_mod { + self.set_bit(abs_bit, cb.abs); + self.set_bit(neg_bit, cb.neg); + } else { + assert!(!cb.abs && !cb.neg); + } + + if is_fp16_alu { + self.set_swizzle(swizzle_range, cb.swizzle); + } else { + assert!(cb.swizzle == SrcSwizzle::None); + } } fn set_alu_reg_src( @@ -306,15 +366,164 @@ impl SM70Instr { range: Range, abs_bit: usize, neg_bit: usize, + swizzle_range: Range, + is_fp16_alu: bool, + has_mod: bool, src: &ALUSrc, ) { match src { ALUSrc::None => (), - ALUSrc::Reg(reg) => self.set_alu_reg(range, abs_bit, neg_bit, reg), - _ => panic!("Invalid ALU src0"), + ALUSrc::Reg(reg) => self.set_alu_reg( + range, + abs_bit, + neg_bit, + swizzle_range, + is_fp16_alu, + has_mod, + reg, + ), + _ => panic!("Invalid ALU src"), } } + fn encode_alu_base( + &mut self, + opcode: u16, + dst: Option, + src0: ALUSrc, + src1: ALUSrc, + src2: ALUSrc, + is_fp16_alu: bool, + ) { + if let Some(dst) = dst { + self.set_dst(dst); + } + + // For opcodes like OpHAdd, both sources support full modifiers and swizzle, + // even when we use a form where the two sources go in src0 and src2. + // For OpHFma, however, which uses both src1 and src2, only src1 supports modifiers. + let src2_has_mod = !is_fp16_alu || matches!(src1, ALUSrc::None); + + self.set_alu_reg_src(24..32, 73, 72, 74..76, is_fp16_alu, true, &src0); + + let form = match &src2 { + ALUSrc::None | ALUSrc::Reg(_) => { + self.set_alu_reg_src( + 64..72, + 74, + 75, + 81..83, + is_fp16_alu, + src2_has_mod, + &src2, + ); + + match &src1 { + ALUSrc::None => 1_u8, // form + ALUSrc::Reg(reg1) => { + self.set_alu_reg( + 32..40, + 62, + 63, + 60..62, + is_fp16_alu, + true, + reg1, + ); + 1_u8 // form + } + ALUSrc::UReg(reg1) => { + self.set_alu_ureg( + 32..40, + 62, + 63, + 60..62, + is_fp16_alu, + true, + reg1, + ); + 6_u8 // form + } + ALUSrc::Imm32(imm) => { + self.set_src_imm(32..64, imm); + 4_u8 // form + } + ALUSrc::CBuf(cb) => { + self.set_alu_cb( + 38..59, + 62, + 63, + 60..62, + is_fp16_alu, + true, + cb, + ); + 5_u8 // form + } + } + } + ALUSrc::UReg(reg2) => { + self.set_alu_ureg( + 32..40, + 62, + 63, + 60..62, + is_fp16_alu, + src2_has_mod, + reg2, + ); + self.set_alu_reg_src( + 64..72, + 74, + 75, + 81..83, + is_fp16_alu, + true, + &src1, + ); + 7_u8 // form + } + ALUSrc::Imm32(imm) => { + self.set_src_imm(32..64, imm); + self.set_alu_reg_src( + 64..72, + 74, + 75, + 81..83, + is_fp16_alu, + true, + &src1, + ); + 2_u8 // form + } + ALUSrc::CBuf(cb) => { + // TODO set_src_cx + self.set_alu_cb( + 38..59, + 62, + 63, + 60..62, + is_fp16_alu, + src2_has_mod, + cb, + ); + self.set_alu_reg_src( + 64..72, + 74, + 75, + 81..83, + is_fp16_alu, + true, + &src1, + ); + 3_u8 // form + } + }; + + self.set_field(0..9, opcode); + self.set_field(9..12, form); + } + fn encode_alu( &mut self, opcode: u16, @@ -323,55 +532,7 @@ impl SM70Instr { src1: ALUSrc, src2: ALUSrc, ) { - if let Some(dst) = dst { - self.set_dst(dst); - } - - self.set_alu_reg_src(24..32, 73, 72, &src0); - - let form = match &src2 { - ALUSrc::None | ALUSrc::Reg(_) => { - self.set_alu_reg_src(64..72, 74, 75, &src2); - match &src1 { - ALUSrc::None => 1_u8, // form - ALUSrc::Reg(reg1) => { - self.set_alu_reg(32..40, 62, 63, reg1); - 1_u8 // form - } - ALUSrc::UReg(reg1) => { - self.set_alu_ureg(32..40, 62, 63, reg1); - 6_u8 // form - } - ALUSrc::Imm32(imm) => { - self.set_src_imm(32..64, imm); - 4_u8 // form - } - ALUSrc::CBuf(cb) => { - self.set_alu_cb(38..59, 62, 63, cb); - 5_u8 // form - } - } - } - ALUSrc::UReg(reg2) => { - self.set_alu_ureg(32..40, 62, 63, reg2); - self.set_alu_reg_src(64..72, 74, 75, &src1); - 7_u8 // form - } - ALUSrc::Imm32(imm) => { - self.set_src_imm(32..64, imm); - self.set_alu_reg_src(64..72, 74, 75, &src1); - 2_u8 // form - } - ALUSrc::CBuf(cb) => { - // TODO set_src_cx - self.set_alu_cb(38..59, 62, 63, cb); - self.set_alu_reg_src(64..72, 74, 75, &src1); - 3_u8 // form - } - }; - - self.set_field(0..9, opcode); - self.set_field(9..12, form); + self.encode_alu_base(opcode, dst, src0, src1, src2, false); } fn set_instr_deps(&mut self, deps: &InstrDeps) { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 9a38f6e53ad..fd989406ffa 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -1036,6 +1036,8 @@ pub enum SrcType { SSA, GPR, ALU, + F16, + F16v2, F32, F64, I32, @@ -1044,10 +1046,35 @@ pub enum SrcType { Bar, } +#[derive(Clone, Copy, PartialEq)] +#[allow(dead_code)] +pub enum SrcSwizzle { + None, + Xx, + Yy, +} + +impl SrcSwizzle { + pub fn is_none(&self) -> bool { + matches!(self, SrcSwizzle::None) + } +} + +impl fmt::Display for SrcSwizzle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SrcSwizzle::None => Ok(()), + SrcSwizzle::Xx => write!(f, ".xx"), + SrcSwizzle::Yy => write!(f, ".yy"), + } + } +} + #[derive(Clone, Copy, PartialEq)] pub struct Src { pub src_ref: SrcRef, pub src_mod: SrcMod, + pub src_swizzle: SrcSwizzle, } impl Src { @@ -1067,6 +1094,7 @@ impl Src { Src { src_ref: self.src_ref, src_mod: self.src_mod.fabs(), + src_swizzle: self.src_swizzle, } } @@ -1074,6 +1102,7 @@ impl Src { Src { src_ref: self.src_ref, src_mod: self.src_mod.fneg(), + src_swizzle: self.src_swizzle, } } @@ -1081,6 +1110,7 @@ impl Src { Src { src_ref: self.src_ref, src_mod: self.src_mod.ineg(), + src_swizzle: self.src_swizzle, } } @@ -1088,6 +1118,7 @@ impl Src { Src { src_ref: self.src_ref, src_mod: self.src_mod.bnot(), + src_swizzle: self.src_swizzle, } } @@ -1096,11 +1127,39 @@ impl Src { return *self; }; - if self.src_mod.is_none() { + if self.src_mod.is_none() && self.src_swizzle.is_none() { return *self; } + assert!(src_type == SrcType::F16v2 || self.src_swizzle.is_none()); + u = match src_type { + SrcType::F16 => { + let low = u & 0xFFFF; + + match self.src_mod { + SrcMod::None => low, + SrcMod::FAbs => low & !(1_u32 << 15), + SrcMod::FNeg => low ^ (1_u32 << 15), + SrcMod::FNegAbs => low | (1_u32 << 15), + _ => panic!("Not a float source modifier"), + } + } + SrcType::F16v2 => { + let u = match self.src_swizzle { + SrcSwizzle::None => u, + SrcSwizzle::Xx => (u << 16) | (u & 0xffff), + SrcSwizzle::Yy => (u & 0xffff0000) | (u >> 16), + }; + + match self.src_mod { + SrcMod::None => u, + SrcMod::FAbs => u & 0x7FFF7FFF, + SrcMod::FNeg => u ^ 0x80008000, + SrcMod::FNegAbs => u | 0x80008000, + _ => panic!("Not a float source modifier"), + } + } SrcType::F32 | SrcType::F64 => match self.src_mod { SrcMod::None => u, SrcMod::FAbs => u & !(1_u32 << 31), @@ -1127,6 +1186,7 @@ impl Src { Src { src_mod: SrcMod::None, src_ref: u.into(), + src_swizzle: SrcSwizzle::None, } } @@ -1233,7 +1293,9 @@ impl Src { pub fn is_fneg_zero(&self, src_type: SrcType) -> bool { match self.fold_imm(src_type).src_ref { + SrcRef::Imm32(0x00008000) => src_type == SrcType::F16, SrcRef::Imm32(0x80000000) => src_type == SrcType::F32, + SrcRef::Imm32(0x80008000) => src_type == SrcType::F16v2, _ => false, } } @@ -1259,7 +1321,7 @@ impl Src { ) } SrcType::ALU => self.src_mod.is_none() && self.src_ref.is_alu(), - SrcType::F32 | SrcType::F64 => { + SrcType::F16 | SrcType::F32 | SrcType::F64 | SrcType::F16v2 => { match self.src_mod { SrcMod::None | SrcMod::FAbs @@ -1304,6 +1366,7 @@ impl> From for Src { Src { src_ref: value.into(), src_mod: SrcMod::None, + src_swizzle: SrcSwizzle::None, } } } @@ -1311,12 +1374,14 @@ impl> From for Src { impl fmt::Display for Src { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.src_mod { - SrcMod::None => write!(f, "{}", self.src_ref), - SrcMod::FAbs => write!(f, "|{}|", self.src_ref), - SrcMod::FNeg => write!(f, "-{}", self.src_ref), - SrcMod::FNegAbs => write!(f, "-|{}|", self.src_ref), - SrcMod::INeg => write!(f, "-{}", self.src_ref), - SrcMod::BNot => write!(f, "!{}", self.src_ref), + SrcMod::None => write!(f, "{}{}", self.src_ref, self.src_swizzle), + SrcMod::FAbs => write!(f, "|{}{}|", self.src_ref, self.src_swizzle), + SrcMod::FNeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle), + SrcMod::FNegAbs => { + write!(f, "-|{}{}|", self.src_ref, self.src_swizzle) + } + SrcMod::INeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle), + SrcMod::BNot => write!(f, "!{}{}", self.src_ref, self.src_swizzle), } } } @@ -3182,7 +3247,7 @@ impl SrcsAsSlice for OpF2F { fn src_types(&self) -> SrcTypeList { let src_type = match self.src_type { - FloatType::F16 => SrcType::ALU, + FloatType::F16 => SrcType::F16, FloatType::F32 => SrcType::F32, FloatType::F64 => SrcType::F64, }; @@ -3232,7 +3297,7 @@ impl SrcsAsSlice for OpF2I { fn src_types(&self) -> SrcTypeList { let src_type = match self.src_type { - FloatType::F16 => SrcType::ALU, + FloatType::F16 => SrcType::F16, FloatType::F32 => SrcType::F32, FloatType::F64 => SrcType::F64, }; @@ -3352,7 +3417,7 @@ impl SrcsAsSlice for OpFRnd { fn src_types(&self) -> SrcTypeList { let src_type = match self.src_type { - FloatType::F16 => SrcType::ALU, + FloatType::F16 => SrcType::F16, FloatType::F32 => SrcType::F32, FloatType::F64 => SrcType::F64, }; diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index 61fdce3fbae..f6d822c4668 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -48,6 +48,8 @@ fn copy_alu_src(b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType) { SrcType::GPR | SrcType::ALU | SrcType::F32 + | SrcType::F16 + | SrcType::F16v2 | SrcType::I32 | SrcType::B32 => b.alloc_ssa(RegFile::GPR, 1), SrcType::F64 => b.alloc_ssa(RegFile::GPR, 2), @@ -431,6 +433,8 @@ fn legalize_sm50_instr( assert!(src_is_reg(src)); } SrcType::ALU + | SrcType::F16 + | SrcType::F16v2 | SrcType::F32 | SrcType::F64 | SrcType::I32 @@ -721,6 +725,8 @@ fn legalize_sm70_instr( assert!(src_is_reg(src)); } SrcType::ALU + | SrcType::F16 + | SrcType::F16v2 | SrcType::F32 | SrcType::F64 | SrcType::I32 diff --git a/src/nouveau/compiler/nak/opt_copy_prop.rs b/src/nouveau/compiler/nak/opt_copy_prop.rs index 06933934601..58d1f32344e 100644 --- a/src/nouveau/compiler/nak/opt_copy_prop.rs +++ b/src/nouveau/compiler/nak/opt_copy_prop.rs @@ -44,6 +44,7 @@ impl CopyPropPass { let hi32 = Src { src_ref: SrcRef::CBuf(cb.offset(4)), src_mod: src.src_mod, + src_swizzle: src.src_swizzle, }; self.add_copy(dst[0], SrcType::ALU, lo32); self.add_copy(dst[1], SrcType::F64, hi32); @@ -54,6 +55,7 @@ impl CopyPropPass { let hi32 = Src { src_ref: ssa[1].into(), src_mod: src.src_mod, + src_swizzle: src.src_swizzle, }; self.add_copy(dst[0], SrcType::ALU, lo32); self.add_copy(dst[1], SrcType::F64, hi32); @@ -289,6 +291,8 @@ impl CopyPropPass { self.prop_to_gpr_src(src); } SrcType::ALU + | SrcType::F16 + | SrcType::F16v2 | SrcType::F32 | SrcType::I32 | SrcType::B32