nak: Add F16 and F16v2 sources

This also add a swizzle information in Src for F16v2.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27635>
This commit is contained in:
Mary Guillemard 2024-01-30 16:49:47 +01:00 committed by Marge Bot
parent 791c2b604a
commit bad23ddb48
4 changed files with 304 additions and 68 deletions

View file

@ -11,12 +11,14 @@ struct ALURegRef {
pub reg: RegRef,
pub abs: bool,
pub neg: bool,
pub swizzle: SrcSwizzle,
}
struct ALUCBufRef {
pub cb: CBufRef,
pub abs: bool,
pub neg: bool,
pub swizzle: SrcSwizzle,
}
enum ALUSrc {
@ -72,6 +74,7 @@ impl ALUSrc {
reg: reg,
abs: src_mod_has_abs(src.src_mod),
neg: src_mod_has_neg(src.src_mod),
swizzle: src.src_swizzle,
};
match reg.file() {
RegFile::GPR => ALUSrc::Reg(alu_ref),
@ -81,6 +84,7 @@ impl ALUSrc {
}
SrcRef::Imm32(i) => {
assert!(src.src_mod.is_none());
assert!(src.src_swizzle.is_none());
ALUSrc::Imm32(i)
}
SrcRef::CBuf(cb) => {
@ -88,6 +92,7 @@ impl ALUSrc {
cb: cb,
abs: src_mod_has_abs(src.src_mod),
neg: src_mod_has_neg(src.src_mod),
swizzle: src.src_swizzle,
};
ALUSrc::CBuf(alu_ref)
}
@ -265,16 +270,43 @@ impl SM70Instr {
self.set_bar_reg(range, *src.src_ref.as_reg().unwrap());
}
fn set_swizzle(&mut self, range: Range<usize>, swizzle: SrcSwizzle) {
assert!(range.len() == 2);
self.set_field(
range,
match swizzle {
SrcSwizzle::None => 0x00_u8,
SrcSwizzle::Xx => 0x02_u8,
SrcSwizzle::Yy => 0x03_u8,
},
);
}
fn set_alu_reg(
&mut self,
range: Range<usize>,
abs_bit: usize,
neg_bit: usize,
swizzle_range: Range<usize>,
is_fp16_alu: bool,
has_mod: bool,
reg: &ALURegRef,
) {
self.set_reg(range, reg.reg);
self.set_bit(abs_bit, reg.abs);
self.set_bit(neg_bit, reg.neg);
if has_mod {
self.set_bit(abs_bit, reg.abs);
self.set_bit(neg_bit, reg.neg);
} else {
assert!(!reg.abs && !reg.neg);
}
if is_fp16_alu {
self.set_swizzle(swizzle_range, reg.swizzle);
} else {
assert!(reg.swizzle == SrcSwizzle::None);
}
}
fn set_alu_ureg(
@ -282,11 +314,25 @@ impl SM70Instr {
range: Range<usize>,
abs_bit: usize,
neg_bit: usize,
swizzle_range: Range<usize>,
is_fp16_alu: bool,
has_mod: bool,
reg: &ALURegRef,
) {
self.set_ureg(range, reg.reg);
self.set_bit(abs_bit, reg.abs);
self.set_bit(neg_bit, reg.neg);
if has_mod {
self.set_bit(abs_bit, reg.abs);
self.set_bit(neg_bit, reg.neg);
} else {
assert!(!reg.abs && !reg.neg);
}
if is_fp16_alu {
self.set_swizzle(swizzle_range, reg.swizzle);
} else {
assert!(reg.swizzle == SrcSwizzle::None);
}
}
fn set_alu_cb(
@ -294,11 +340,25 @@ impl SM70Instr {
range: Range<usize>,
abs_bit: usize,
neg_bit: usize,
swizzle_range: Range<usize>,
is_fp16_alu: bool,
has_mod: bool,
cb: &ALUCBufRef,
) {
self.set_src_cb(range, &cb.cb);
self.set_bit(abs_bit, cb.abs);
self.set_bit(neg_bit, cb.neg);
if has_mod {
self.set_bit(abs_bit, cb.abs);
self.set_bit(neg_bit, cb.neg);
} else {
assert!(!cb.abs && !cb.neg);
}
if is_fp16_alu {
self.set_swizzle(swizzle_range, cb.swizzle);
} else {
assert!(cb.swizzle == SrcSwizzle::None);
}
}
fn set_alu_reg_src(
@ -306,15 +366,164 @@ impl SM70Instr {
range: Range<usize>,
abs_bit: usize,
neg_bit: usize,
swizzle_range: Range<usize>,
is_fp16_alu: bool,
has_mod: bool,
src: &ALUSrc,
) {
match src {
ALUSrc::None => (),
ALUSrc::Reg(reg) => self.set_alu_reg(range, abs_bit, neg_bit, reg),
_ => panic!("Invalid ALU src0"),
ALUSrc::Reg(reg) => self.set_alu_reg(
range,
abs_bit,
neg_bit,
swizzle_range,
is_fp16_alu,
has_mod,
reg,
),
_ => panic!("Invalid ALU src"),
}
}
fn encode_alu_base(
&mut self,
opcode: u16,
dst: Option<Dst>,
src0: ALUSrc,
src1: ALUSrc,
src2: ALUSrc,
is_fp16_alu: bool,
) {
if let Some(dst) = dst {
self.set_dst(dst);
}
// For opcodes like OpHAdd, both sources support full modifiers and swizzle,
// even when we use a form where the two sources go in src0 and src2.
// For OpHFma, however, which uses both src1 and src2, only src1 supports modifiers.
let src2_has_mod = !is_fp16_alu || matches!(src1, ALUSrc::None);
self.set_alu_reg_src(24..32, 73, 72, 74..76, is_fp16_alu, true, &src0);
let form = match &src2 {
ALUSrc::None | ALUSrc::Reg(_) => {
self.set_alu_reg_src(
64..72,
74,
75,
81..83,
is_fp16_alu,
src2_has_mod,
&src2,
);
match &src1 {
ALUSrc::None => 1_u8, // form
ALUSrc::Reg(reg1) => {
self.set_alu_reg(
32..40,
62,
63,
60..62,
is_fp16_alu,
true,
reg1,
);
1_u8 // form
}
ALUSrc::UReg(reg1) => {
self.set_alu_ureg(
32..40,
62,
63,
60..62,
is_fp16_alu,
true,
reg1,
);
6_u8 // form
}
ALUSrc::Imm32(imm) => {
self.set_src_imm(32..64, imm);
4_u8 // form
}
ALUSrc::CBuf(cb) => {
self.set_alu_cb(
38..59,
62,
63,
60..62,
is_fp16_alu,
true,
cb,
);
5_u8 // form
}
}
}
ALUSrc::UReg(reg2) => {
self.set_alu_ureg(
32..40,
62,
63,
60..62,
is_fp16_alu,
src2_has_mod,
reg2,
);
self.set_alu_reg_src(
64..72,
74,
75,
81..83,
is_fp16_alu,
true,
&src1,
);
7_u8 // form
}
ALUSrc::Imm32(imm) => {
self.set_src_imm(32..64, imm);
self.set_alu_reg_src(
64..72,
74,
75,
81..83,
is_fp16_alu,
true,
&src1,
);
2_u8 // form
}
ALUSrc::CBuf(cb) => {
// TODO set_src_cx
self.set_alu_cb(
38..59,
62,
63,
60..62,
is_fp16_alu,
src2_has_mod,
cb,
);
self.set_alu_reg_src(
64..72,
74,
75,
81..83,
is_fp16_alu,
true,
&src1,
);
3_u8 // form
}
};
self.set_field(0..9, opcode);
self.set_field(9..12, form);
}
fn encode_alu(
&mut self,
opcode: u16,
@ -323,55 +532,7 @@ impl SM70Instr {
src1: ALUSrc,
src2: ALUSrc,
) {
if let Some(dst) = dst {
self.set_dst(dst);
}
self.set_alu_reg_src(24..32, 73, 72, &src0);
let form = match &src2 {
ALUSrc::None | ALUSrc::Reg(_) => {
self.set_alu_reg_src(64..72, 74, 75, &src2);
match &src1 {
ALUSrc::None => 1_u8, // form
ALUSrc::Reg(reg1) => {
self.set_alu_reg(32..40, 62, 63, reg1);
1_u8 // form
}
ALUSrc::UReg(reg1) => {
self.set_alu_ureg(32..40, 62, 63, reg1);
6_u8 // form
}
ALUSrc::Imm32(imm) => {
self.set_src_imm(32..64, imm);
4_u8 // form
}
ALUSrc::CBuf(cb) => {
self.set_alu_cb(38..59, 62, 63, cb);
5_u8 // form
}
}
}
ALUSrc::UReg(reg2) => {
self.set_alu_ureg(32..40, 62, 63, reg2);
self.set_alu_reg_src(64..72, 74, 75, &src1);
7_u8 // form
}
ALUSrc::Imm32(imm) => {
self.set_src_imm(32..64, imm);
self.set_alu_reg_src(64..72, 74, 75, &src1);
2_u8 // form
}
ALUSrc::CBuf(cb) => {
// TODO set_src_cx
self.set_alu_cb(38..59, 62, 63, cb);
self.set_alu_reg_src(64..72, 74, 75, &src1);
3_u8 // form
}
};
self.set_field(0..9, opcode);
self.set_field(9..12, form);
self.encode_alu_base(opcode, dst, src0, src1, src2, false);
}
fn set_instr_deps(&mut self, deps: &InstrDeps) {

View file

@ -1036,6 +1036,8 @@ pub enum SrcType {
SSA,
GPR,
ALU,
F16,
F16v2,
F32,
F64,
I32,
@ -1044,10 +1046,35 @@ pub enum SrcType {
Bar,
}
#[derive(Clone, Copy, PartialEq)]
#[allow(dead_code)]
pub enum SrcSwizzle {
None,
Xx,
Yy,
}
impl SrcSwizzle {
pub fn is_none(&self) -> bool {
matches!(self, SrcSwizzle::None)
}
}
impl fmt::Display for SrcSwizzle {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SrcSwizzle::None => Ok(()),
SrcSwizzle::Xx => write!(f, ".xx"),
SrcSwizzle::Yy => write!(f, ".yy"),
}
}
}
#[derive(Clone, Copy, PartialEq)]
pub struct Src {
pub src_ref: SrcRef,
pub src_mod: SrcMod,
pub src_swizzle: SrcSwizzle,
}
impl Src {
@ -1067,6 +1094,7 @@ impl Src {
Src {
src_ref: self.src_ref,
src_mod: self.src_mod.fabs(),
src_swizzle: self.src_swizzle,
}
}
@ -1074,6 +1102,7 @@ impl Src {
Src {
src_ref: self.src_ref,
src_mod: self.src_mod.fneg(),
src_swizzle: self.src_swizzle,
}
}
@ -1081,6 +1110,7 @@ impl Src {
Src {
src_ref: self.src_ref,
src_mod: self.src_mod.ineg(),
src_swizzle: self.src_swizzle,
}
}
@ -1088,6 +1118,7 @@ impl Src {
Src {
src_ref: self.src_ref,
src_mod: self.src_mod.bnot(),
src_swizzle: self.src_swizzle,
}
}
@ -1096,11 +1127,39 @@ impl Src {
return *self;
};
if self.src_mod.is_none() {
if self.src_mod.is_none() && self.src_swizzle.is_none() {
return *self;
}
assert!(src_type == SrcType::F16v2 || self.src_swizzle.is_none());
u = match src_type {
SrcType::F16 => {
let low = u & 0xFFFF;
match self.src_mod {
SrcMod::None => low,
SrcMod::FAbs => low & !(1_u32 << 15),
SrcMod::FNeg => low ^ (1_u32 << 15),
SrcMod::FNegAbs => low | (1_u32 << 15),
_ => panic!("Not a float source modifier"),
}
}
SrcType::F16v2 => {
let u = match self.src_swizzle {
SrcSwizzle::None => u,
SrcSwizzle::Xx => (u << 16) | (u & 0xffff),
SrcSwizzle::Yy => (u & 0xffff0000) | (u >> 16),
};
match self.src_mod {
SrcMod::None => u,
SrcMod::FAbs => u & 0x7FFF7FFF,
SrcMod::FNeg => u ^ 0x80008000,
SrcMod::FNegAbs => u | 0x80008000,
_ => panic!("Not a float source modifier"),
}
}
SrcType::F32 | SrcType::F64 => match self.src_mod {
SrcMod::None => u,
SrcMod::FAbs => u & !(1_u32 << 31),
@ -1127,6 +1186,7 @@ impl Src {
Src {
src_mod: SrcMod::None,
src_ref: u.into(),
src_swizzle: SrcSwizzle::None,
}
}
@ -1233,7 +1293,9 @@ impl Src {
pub fn is_fneg_zero(&self, src_type: SrcType) -> bool {
match self.fold_imm(src_type).src_ref {
SrcRef::Imm32(0x00008000) => src_type == SrcType::F16,
SrcRef::Imm32(0x80000000) => src_type == SrcType::F32,
SrcRef::Imm32(0x80008000) => src_type == SrcType::F16v2,
_ => false,
}
}
@ -1259,7 +1321,7 @@ impl Src {
)
}
SrcType::ALU => self.src_mod.is_none() && self.src_ref.is_alu(),
SrcType::F32 | SrcType::F64 => {
SrcType::F16 | SrcType::F32 | SrcType::F64 | SrcType::F16v2 => {
match self.src_mod {
SrcMod::None
| SrcMod::FAbs
@ -1304,6 +1366,7 @@ impl<T: Into<SrcRef>> From<T> for Src {
Src {
src_ref: value.into(),
src_mod: SrcMod::None,
src_swizzle: SrcSwizzle::None,
}
}
}
@ -1311,12 +1374,14 @@ impl<T: Into<SrcRef>> From<T> for Src {
impl fmt::Display for Src {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.src_mod {
SrcMod::None => write!(f, "{}", self.src_ref),
SrcMod::FAbs => write!(f, "|{}|", self.src_ref),
SrcMod::FNeg => write!(f, "-{}", self.src_ref),
SrcMod::FNegAbs => write!(f, "-|{}|", self.src_ref),
SrcMod::INeg => write!(f, "-{}", self.src_ref),
SrcMod::BNot => write!(f, "!{}", self.src_ref),
SrcMod::None => write!(f, "{}{}", self.src_ref, self.src_swizzle),
SrcMod::FAbs => write!(f, "|{}{}|", self.src_ref, self.src_swizzle),
SrcMod::FNeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
SrcMod::FNegAbs => {
write!(f, "-|{}{}|", self.src_ref, self.src_swizzle)
}
SrcMod::INeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
SrcMod::BNot => write!(f, "!{}{}", self.src_ref, self.src_swizzle),
}
}
}
@ -3182,7 +3247,7 @@ impl SrcsAsSlice for OpF2F {
fn src_types(&self) -> SrcTypeList {
let src_type = match self.src_type {
FloatType::F16 => SrcType::ALU,
FloatType::F16 => SrcType::F16,
FloatType::F32 => SrcType::F32,
FloatType::F64 => SrcType::F64,
};
@ -3232,7 +3297,7 @@ impl SrcsAsSlice for OpF2I {
fn src_types(&self) -> SrcTypeList {
let src_type = match self.src_type {
FloatType::F16 => SrcType::ALU,
FloatType::F16 => SrcType::F16,
FloatType::F32 => SrcType::F32,
FloatType::F64 => SrcType::F64,
};
@ -3352,7 +3417,7 @@ impl SrcsAsSlice for OpFRnd {
fn src_types(&self) -> SrcTypeList {
let src_type = match self.src_type {
FloatType::F16 => SrcType::ALU,
FloatType::F16 => SrcType::F16,
FloatType::F32 => SrcType::F32,
FloatType::F64 => SrcType::F64,
};

View file

@ -48,6 +48,8 @@ fn copy_alu_src(b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType) {
SrcType::GPR
| SrcType::ALU
| SrcType::F32
| SrcType::F16
| SrcType::F16v2
| SrcType::I32
| SrcType::B32 => b.alloc_ssa(RegFile::GPR, 1),
SrcType::F64 => b.alloc_ssa(RegFile::GPR, 2),
@ -431,6 +433,8 @@ fn legalize_sm50_instr(
assert!(src_is_reg(src));
}
SrcType::ALU
| SrcType::F16
| SrcType::F16v2
| SrcType::F32
| SrcType::F64
| SrcType::I32
@ -721,6 +725,8 @@ fn legalize_sm70_instr(
assert!(src_is_reg(src));
}
SrcType::ALU
| SrcType::F16
| SrcType::F16v2
| SrcType::F32
| SrcType::F64
| SrcType::I32

View file

@ -44,6 +44,7 @@ impl CopyPropPass {
let hi32 = Src {
src_ref: SrcRef::CBuf(cb.offset(4)),
src_mod: src.src_mod,
src_swizzle: src.src_swizzle,
};
self.add_copy(dst[0], SrcType::ALU, lo32);
self.add_copy(dst[1], SrcType::F64, hi32);
@ -54,6 +55,7 @@ impl CopyPropPass {
let hi32 = Src {
src_ref: ssa[1].into(),
src_mod: src.src_mod,
src_swizzle: src.src_swizzle,
};
self.add_copy(dst[0], SrcType::ALU, lo32);
self.add_copy(dst[1], SrcType::F64, hi32);
@ -289,6 +291,8 @@ impl CopyPropPass {
self.prop_to_gpr_src(src);
}
SrcType::ALU
| SrcType::F16
| SrcType::F16v2
| SrcType::F32
| SrcType::I32
| SrcType::B32