From e039b7e1ac3dc7666b0d83ed621b5c5c279a760a Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Jul 2024 15:00:20 -0500 Subject: [PATCH] nak/sm50: Move legalization into SM50Op Part-of: --- src/nouveau/compiler/nak/legalize.rs | 282 +--------------- src/nouveau/compiler/nak/sm50.rs | 463 +++++++++++++++++++++++++++ 2 files changed, 464 insertions(+), 281 deletions(-) diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index b2dc88df149..92083163ccb 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -4,6 +4,7 @@ use crate::api::{GetDebugFlags, DEBUG}; use crate::ir::*; use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness}; +use crate::sm50::legalize_sm50_instr; use crate::sm70::legalize_sm70_instr; use std::collections::{HashMap, HashSet}; @@ -269,34 +270,6 @@ pub trait LegalizeBuildHelpers: SSABuilder { } } - fn copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType) { - if src.src_mod.has_fabs() { - self.copy_alu_src_and_lower_fmod(src, src_type); - } - } - - fn copy_alu_src_if_i20_overflow( - &mut self, - src: &mut Src, - reg_file: RegFile, - src_type: SrcType, - ) { - if src.as_imm_not_i20().is_some() { - self.copy_alu_src(src, reg_file, src_type); - } - } - - fn copy_alu_src_if_f20_overflow( - &mut self, - src: &mut Src, - reg_file: RegFile, - src_type: SrcType, - ) { - if src.as_imm_not_f20().is_some() { - self.copy_alu_src(src, reg_file, src_type); - } - } - fn copy_ssa_ref_if_uniform(&mut self, ssa_ref: &mut SSARef) { for ssa in &mut ssa_ref[..] { if ssa.is_uniform() { @@ -310,259 +283,6 @@ pub trait LegalizeBuildHelpers: SSABuilder { impl LegalizeBuildHelpers for LegalizeBuilder<'_> {} -fn legalize_sm50_instr(b: &mut LegalizeBuilder, instr: &mut Instr) { - debug_assert!(!instr.is_uniform()); - - use RegFile::GPR; - - match &mut instr.op { - Op::Shf(op) => { - b.copy_alu_src_if_not_reg(&mut op.shift, GPR, SrcType::GPR); - b.copy_alu_src_if_not_reg(&mut op.high, GPR, SrcType::ALU); - b.copy_alu_src_if_not_reg(&mut op.low, GPR, SrcType::GPR); - b.copy_alu_src_if_i20_overflow(&mut op.shift, GPR, SrcType::GPR); - } - Op::Shl(op) => { - b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::GPR); - b.copy_alu_src_if_i20_overflow(&mut op.shift, GPR, SrcType::ALU); - } - Op::Shr(op) => { - b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::GPR); - b.copy_alu_src_if_i20_overflow(&mut op.shift, GPR, SrcType::ALU); - } - Op::FAdd(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); - } - Op::FMul(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(&mut op.srcs[0], GPR, SrcType::F32); - } - Op::FSet(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - if swap_srcs_if_not_reg(src0, src1, GPR) { - op.cmp_op = op.cmp_op.flip(); - } - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); - b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); - } - Op::FSetP(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - if swap_srcs_if_not_reg(src0, src1, GPR) { - op.cmp_op = op.cmp_op.flip(); - } - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); - b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); - } - Op::FSwzAdd(op) => { - b.copy_alu_src_if_not_reg(&mut op.srcs[0], GPR, SrcType::GPR); - b.copy_alu_src_if_not_reg(&mut op.srcs[1], GPR, SrcType::GPR); - } - Op::ISetP(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - if swap_srcs_if_not_reg(src0, src1, GPR) { - op.cmp_op = op.cmp_op.flip(); - } - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); - b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU); - } - Op::Lop2(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(&mut op.srcs[0], GPR, SrcType::ALU); - } - Op::Rro(op) => { - b.copy_alu_src_if_f20_overflow(&mut op.src, GPR, SrcType::F32); - } - Op::PSetP(_) => {} - Op::MuFu(op) => { - b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::GPR); - } - Op::DAdd(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); - b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); - } - Op::DFma(op) => { - let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; - b.copy_alu_src_if_fabs(src0, SrcType::F64); - b.copy_alu_src_if_fabs(src1, SrcType::F64); - b.copy_alu_src_if_fabs(src2, SrcType::F64); - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); - b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); - if src_is_reg(src1, GPR) { - b.copy_alu_src_if_imm(src2, GPR, SrcType::F64); - } else { - b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F64); - } - } - Op::DMnMx(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); - b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); - } - Op::DMul(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - b.copy_alu_src_if_fabs(src0, SrcType::F64); - b.copy_alu_src_if_fabs(src1, SrcType::F64); - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); - b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); - } - Op::DSetP(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - if swap_srcs_if_not_reg(src0, src1, GPR) { - op.cmp_op = op.cmp_op.flip(); - } - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); - b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); - } - Op::Sel(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - if swap_srcs_if_not_reg(src0, src1, GPR) { - op.cond = op.cond.bnot(); - } - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); - b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU); - } - Op::Shfl(op) => { - b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::GPR); - b.copy_alu_src_if_not_reg_or_imm(&mut op.lane, GPR, SrcType::ALU); - b.copy_alu_src_if_not_reg_or_imm(&mut op.c, GPR, SrcType::ALU); - } - Op::Vote(_) => {} - Op::IAdd2(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::I32); - } - Op::I2F(op) => { - b.copy_alu_src_if_i20_overflow(&mut op.src, GPR, SrcType::ALU); - } - Op::F2F(op) => { - b.copy_alu_src_if_f20_overflow(&mut op.src, GPR, SrcType::ALU); - } - Op::I2I(op) => { - b.copy_alu_src_if_i20_overflow(&mut op.src, GPR, SrcType::ALU); - } - Op::IMad(op) => { - let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); - if src_is_reg(src1, GPR) { - b.copy_alu_src_if_imm(src2, GPR, SrcType::ALU); - } else { - b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU); - b.copy_alu_src_if_not_reg(src2, GPR, SrcType::ALU); - } - } - Op::IMul(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - if swap_srcs_if_not_reg(src0, src1, GPR) { - op.signed.swap(0, 1); - } - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); - } - Op::F2I(op) => { - b.copy_alu_src_if_f20_overflow(&mut op.src, GPR, SrcType::ALU); - } - Op::IMnMx(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); - } - Op::Ipa(op) => { - b.copy_alu_src_if_not_reg(&mut op.offset, GPR, SrcType::GPR); - b.copy_alu_src_if_not_reg(&mut op.inv_w, GPR, SrcType::GPR); - } - Op::PopC(_) => {} - Op::BRev(op) => { - b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::ALU); - } - Op::Flo(op) => { - b.copy_alu_src_if_i20_overflow(&mut op.src, GPR, SrcType::ALU); - } - Op::FMnMx(op) => { - let [ref mut src0, ref mut src1] = op.srcs; - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); - b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); - } - Op::Prmt(op) => { - b.copy_alu_src_if_not_reg(&mut op.srcs[0], GPR, SrcType::GPR); - b.copy_alu_src_if_not_reg(&mut op.srcs[1], GPR, SrcType::GPR); - b.copy_alu_src_if_i20_overflow(&mut op.sel, GPR, SrcType::ALU); - } - Op::FFma(op) => { - let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; - b.copy_alu_src_if_fabs(src0, SrcType::F32); - b.copy_alu_src_if_fabs(src1, SrcType::F32); - b.copy_alu_src_if_fabs(src2, SrcType::F32); - swap_srcs_if_not_reg(src0, src1, GPR); - b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); - b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F32); - b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); - } - Op::Ldc(op) => { - // TODO: cb must be a bound constant buffer - b.copy_alu_src_if_not_reg(&mut op.offset, GPR, SrcType::GPR); - } - Op::SuLd(op) => { - b.copy_alu_src_if_not_reg(&mut op.handle, GPR, SrcType::GPR); - b.copy_alu_src_if_not_reg(&mut op.coord, GPR, SrcType::GPR); - } - Op::SuAtom(op) => { - b.copy_alu_src_if_not_reg(&mut op.coord, GPR, SrcType::GPR); - b.copy_alu_src_if_not_reg(&mut op.handle, GPR, SrcType::GPR); - b.copy_alu_src_if_not_reg(&mut op.data, GPR, SrcType::GPR); - } - Op::Out(op) => { - b.copy_alu_src_if_not_reg(&mut op.handle, GPR, SrcType::GPR); - b.copy_alu_src_if_i20_overflow(&mut op.stream, GPR, SrcType::ALU); - } - Op::Bfe(op) => { - b.copy_alu_src_if_not_reg(&mut op.base, GPR, SrcType::ALU); - } - _ => { - let src_types = instr.src_types(); - for (i, src) in instr.srcs_mut().iter_mut().enumerate() { - match src_types[i] { - SrcType::SSA => { - assert!(src.as_ssa().is_some()); - } - SrcType::GPR => { - assert!(src_is_reg(src, GPR)); - } - SrcType::ALU - | SrcType::F16 - | SrcType::F16v2 - | SrcType::F32 - | SrcType::F64 - | SrcType::I32 - | SrcType::B32 => { - panic!( - "({}): ALU srcs must be legalized explicitly", - &mut instr.op - ); - } - SrcType::Pred => { - panic!( - "({}): Predicates must be legalized explicitly", - &mut instr.op - ); - } - SrcType::Bar => panic!("Barrier regs are Volta+"), - } - } - } - } -} - fn legalize_instr( b: &mut LegalizeBuilder, bl: &impl BlockLiveness, diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index b735d5033cd..3a7c4a74465 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -2,6 +2,9 @@ // SPDX-License-Identifier: MIT use crate::ir::*; +use crate::legalize::{ + src_is_reg, swap_srcs_if_not_reg, LegalizeBuildHelpers, LegalizeBuilder, +}; use bitview::*; use std::collections::HashMap; @@ -51,6 +54,7 @@ impl Src { } trait SM50Op { + fn legalize(&mut self, b: &mut LegalizeBuilder); fn encode(&self, e: &mut SM50Encoder<'_>); } @@ -272,7 +276,85 @@ impl SM50Encoder<'_> { } } +// +// Legalization helpers +// + +pub trait SM50LegalizeBuildHelpers: LegalizeBuildHelpers { + fn copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType) { + if src.src_mod.has_fabs() { + self.copy_alu_src_and_lower_fmod(src, src_type); + } + } + + fn copy_alu_src_if_i20_overflow( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + if src.as_imm_not_i20().is_some() { + self.copy_alu_src(src, reg_file, src_type); + } + } + + fn copy_alu_src_if_f20_overflow( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + if src.as_imm_not_f20().is_some() { + self.copy_alu_src(src, reg_file, src_type); + } + } +} + +impl SM50LegalizeBuildHelpers for LegalizeBuilder<'_> {} + +/// Helper to legalize extended or external instructions +/// +/// These are instructions which reach out external units such as load/store +/// and texture ops. They typically can't take anything but GPRs and are the +/// only types of instructions that support vectors. +/// +fn legalize_ext_instr(op: &mut impl SrcsAsSlice, _b: &mut LegalizeBuilder) { + let src_types = op.src_types(); + for (i, src) in op.srcs_as_mut_slice().iter_mut().enumerate() { + match src_types[i] { + SrcType::SSA => { + assert!(src.as_ssa().is_some()); + } + SrcType::GPR => { + assert!(src_is_reg(src, RegFile::GPR)); + } + SrcType::ALU + | SrcType::F16 + | SrcType::F16v2 + | SrcType::F32 + | SrcType::F64 + | SrcType::I32 + | SrcType::B32 => { + panic!("ALU srcs must be legalized explicitly"); + } + SrcType::Pred => { + panic!("Predicates must be legalized explicitly"); + } + SrcType::Bar => panic!("Barrier regs are Volta+"), + } + } +} + +// +// Implementations of SM50Op for each op we support on Maxwell/Pascal +// + impl SM50Op for OpMov { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.src.src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -298,6 +380,16 @@ impl SM50Op for OpMov { } impl SM50Op for OpSel { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + if swap_srcs_if_not_reg(src0, src1, GPR) { + self.cond = self.cond.bnot(); + } + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); + b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[1].src_ref { SrcRef::Imm32(imm32) => { @@ -322,6 +414,13 @@ impl SM50Op for OpSel { } impl SM50Op for OpShfl { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg_or_imm(&mut self.c, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xef10); @@ -365,6 +464,10 @@ impl SM50Op for OpShfl { } impl SM50Op for OpVote { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0x50d8); @@ -384,6 +487,10 @@ impl SM50Op for OpVote { } impl SM50Op for OpPSetP { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0x5090); @@ -462,6 +569,10 @@ impl SM50Encoder<'_> { } impl SM50Op for OpLd { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(match self.access.space { MemSpace::Global(_) => 0xeed0, @@ -478,6 +589,11 @@ impl SM50Op for OpLd { } impl SM50Op for OpLdc { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.offset, GPR, SrcType::GPR); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { assert!(self.cb.src_mod.is_none()); let SrcRef::CBuf(cb) = &self.cb.src_ref else { @@ -507,6 +623,10 @@ impl SM50Op for OpLdc { } impl SM50Op for OpSt { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(match self.access.space { MemSpace::Global(_) => 0xeed8, @@ -522,6 +642,13 @@ impl SM50Op for OpSt { } impl SM50Op for OpLop2 { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { if let Some(imm32) = self.srcs[1].as_imm_not_i20() { e.set_opcode(0x0400); @@ -581,6 +708,14 @@ impl SM50Op for OpLop2 { } impl SM50Op for OpShf { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.shift, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut self.high, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut self.low, GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::GPR); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.shift.src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -618,6 +753,12 @@ impl SM50Op for OpShf { } impl SM50Op for OpShl { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_dst(self.dst); e.set_reg_src(8..16, self.src); @@ -642,6 +783,12 @@ impl SM50Op for OpShl { } impl SM50Op for OpShr { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_dst(self.dst); e.set_reg_src(8..16, self.src); @@ -667,6 +814,11 @@ impl SM50Op for OpShr { } impl SM50Op for OpI2F { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { let abs_bit = 49; let neg_bit = 45; @@ -698,6 +850,11 @@ impl SM50Op for OpI2F { } impl SM50Op for OpF2F { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { assert!(self.src.is_reg_or_zero()); @@ -732,6 +889,11 @@ impl SM50Op for OpF2F { } impl SM50Op for OpI2I { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.src.src_ref { SrcRef::Imm32(imm32) => { @@ -764,6 +926,19 @@ impl SM50Op for OpI2I { } impl SM50Op for OpIMad { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1, src2] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); + if src_is_reg(src1, GPR) { + b.copy_alu_src_if_imm(src2, GPR, SrcType::ALU); + } else { + b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(src2, GPR, SrcType::ALU); + } + } + fn encode(&self, e: &mut SM50Encoder<'_>) { let neg_1_bit = 51; let neg_2_bit = 52; @@ -809,6 +984,15 @@ impl SM50Op for OpIMad { } impl SM50Op for OpIMul { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + if swap_srcs_if_not_reg(src0, src1, GPR) { + self.signed.swap(0, 1); + } + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { assert!(self.srcs[0].src_mod.is_none()); assert!(self.srcs[1].src_mod.is_none()); @@ -848,6 +1032,11 @@ impl SM50Op for OpIMul { } impl SM50Op for OpF2I { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.src.src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -877,6 +1066,13 @@ impl SM50Op for OpF2I { } impl SM50Op for OpIMnMx { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[1].src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -938,6 +1134,16 @@ impl SM50Encoder<'_> { } impl SM50Op for OpISetP { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + if swap_srcs_if_not_reg(src0, src1, GPR) { + self.cmp_op = self.cmp_op.flip(); + } + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); + b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { assert!(self.srcs[0].src_mod.is_none()); assert!(self.srcs[1].src_mod.is_none()); @@ -978,6 +1184,10 @@ impl SM50Op for OpISetP { } impl SM50Op for OpSuSt { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xeb20); @@ -1015,6 +1225,10 @@ impl SM50Encoder<'_> { } impl SM50Op for OpAtom { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match self.mem_space { MemSpace::Global(_) => { @@ -1107,6 +1321,10 @@ impl SM50Encoder<'_> { } impl SM50Op for OpTex { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xdeb8); @@ -1127,6 +1345,10 @@ impl SM50Op for OpTex { } impl SM50Op for OpTld { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xdd38); @@ -1151,6 +1373,10 @@ impl SM50Op for OpTld { } impl SM50Op for OpTld4 { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xdef8); @@ -1178,6 +1404,10 @@ impl SM50Op for OpTld4 { } impl SM50Op for OpTmml { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xdf60); @@ -1194,6 +1424,10 @@ impl SM50Op for OpTmml { } impl SM50Op for OpTxd { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xde78); @@ -1211,6 +1445,10 @@ impl SM50Op for OpTxd { } impl SM50Op for OpTxq { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xdf50); @@ -1236,6 +1474,12 @@ impl SM50Op for OpTxq { } impl SM50Op for OpIpa { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.offset, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut self.inv_w, GPR, SrcType::GPR); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xe000); @@ -1270,6 +1514,10 @@ impl SM50Op for OpIpa { } impl SM50Op for OpALd { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xefd8); @@ -1286,6 +1534,10 @@ impl SM50Op for OpALd { } impl SM50Op for OpASt { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xeff0); @@ -1303,6 +1555,10 @@ impl SM50Op for OpASt { } impl SM50Op for OpMemBar { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xef98); @@ -1333,6 +1589,10 @@ impl SM50Encoder<'_> { } impl SM50Op for OpBra { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xe240); e.set_rel_offset(20..44, &self.target); @@ -1341,6 +1601,10 @@ impl SM50Op for OpBra { } impl SM50Op for OpExit { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xe300); @@ -1350,6 +1614,10 @@ impl SM50Op for OpExit { } impl SM50Op for OpBar { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xf0a8); @@ -1371,6 +1639,10 @@ impl SM50Op for OpBar { } impl SM50Op for OpCS2R { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0x50c8); e.set_dst(self.dst); @@ -1379,6 +1651,10 @@ impl SM50Op for OpCS2R { } impl SM50Op for OpKill { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xe330); e.set_field(0..5, 0x0f_u8); @@ -1386,6 +1662,10 @@ impl SM50Op for OpKill { } impl SM50Op for OpNop { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0x50b0); @@ -1395,6 +1675,10 @@ impl SM50Op for OpNop { } impl SM50Op for OpS2R { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xf0c8); e.set_dst(self.dst); @@ -1403,6 +1687,10 @@ impl SM50Op for OpS2R { } impl SM50Op for OpPopC { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { assert!(self.src.is_reg_or_zero()); @@ -1429,6 +1717,13 @@ impl SM50Op for OpPopC { } impl SM50Op for OpFAdd { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { if let Some(imm32) = self.srcs[1].as_imm_not_f20() { e.set_opcode(0x0800); @@ -1465,6 +1760,14 @@ impl SM50Op for OpFAdd { } impl SM50Op for OpFMnMx { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[1].src_ref { SrcRef::Imm32(imm32) => { @@ -1490,6 +1793,13 @@ impl SM50Op for OpFMnMx { } impl SM50Op for OpFMul { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { if let Some(imm32) = self.srcs[1].as_imm_not_f20() { e.set_opcode(0x1e00); @@ -1539,6 +1849,18 @@ impl SM50Op for OpFMul { } impl SM50Op for OpFFma { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1, src2] = &mut self.srcs; + b.copy_alu_src_if_fabs(src0, SrcType::F32); + b.copy_alu_src_if_fabs(src1, SrcType::F32); + b.copy_alu_src_if_fabs(src2, SrcType::F32); + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F32); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { // FFMA doesn't have any abs flags. assert!(!self.srcs[0].src_mod.has_fabs()); @@ -1604,6 +1926,16 @@ impl SM50Encoder<'_> { } impl SM50Op for OpFSet { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + if swap_srcs_if_not_reg(src0, src1, GPR) { + self.cmp_op = self.cmp_op.flip(); + } + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[1].src_ref { SrcRef::Imm32(imm32) => { @@ -1631,6 +1963,16 @@ impl SM50Op for OpFSet { } impl SM50Op for OpFSetP { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + if swap_srcs_if_not_reg(src0, src1, GPR) { + self.cmp_op = self.cmp_op.flip(); + } + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[1].src_ref { SrcRef::Imm32(imm32) => { @@ -1659,6 +2001,12 @@ impl SM50Op for OpFSetP { } impl SM50Op for OpFSwzAdd { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.srcs[0], GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut self.srcs[1], GPR, SrcType::GPR); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0x50f8); @@ -1695,6 +2043,11 @@ impl SM50Op for OpFSwzAdd { } impl SM50Op for OpRro { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::F32); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.src.src_ref { SrcRef::Imm32(imm32) => { @@ -1724,6 +2077,10 @@ impl SM50Op for OpRro { } impl SM50Op for OpMuFu { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + b.copy_alu_src_if_not_reg(&mut self.src, RegFile::GPR, SrcType::GPR); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { assert!(self.src.is_reg_or_zero()); @@ -1754,6 +2111,10 @@ impl SM50Op for OpMuFu { } impl SM50Op for OpFlo { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match self.src.src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -1780,6 +2141,14 @@ impl SM50Op for OpFlo { } impl SM50Op for OpDAdd { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[1].src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -1805,6 +2174,22 @@ impl SM50Op for OpDAdd { } impl SM50Op for OpDFma { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1, src2] = &mut self.srcs; + b.copy_alu_src_if_fabs(src0, SrcType::F64); + b.copy_alu_src_if_fabs(src1, SrcType::F64); + b.copy_alu_src_if_fabs(src2, SrcType::F64); + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + if src_is_reg(src1, GPR) { + b.copy_alu_src_if_imm(src2, GPR, SrcType::F64); + } else { + b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F64); + } + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[2].src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -1851,6 +2236,14 @@ impl SM50Op for OpDFma { } impl SM50Op for OpDMnMx { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[1].src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -1875,6 +2268,16 @@ impl SM50Op for OpDMnMx { } impl SM50Op for OpDMul { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + b.copy_alu_src_if_fabs(src0, SrcType::F64); + b.copy_alu_src_if_fabs(src1, SrcType::F64); + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[1].src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -1908,6 +2311,16 @@ impl SM50Op for OpDMul { } impl SM50Op for OpDSetP { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + if swap_srcs_if_not_reg(src0, src1, GPR) { + self.cmp_op = self.cmp_op.flip(); + } + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.srcs[1].src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -1936,6 +2349,13 @@ impl SM50Op for OpDSetP { } impl SM50Op for OpIAdd2 { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::I32); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { let carry_in = match self.carry_in.src_ref { SrcRef::Reg(reg) if reg.file() == RegFile::Carry => true, @@ -1984,6 +2404,13 @@ impl SM50Op for OpIAdd2 { } impl SM50Op for OpPrmt { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.srcs[0], GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut self.srcs[1], GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut self.sel, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.sel.src_ref { SrcRef::Imm32(imm) => { @@ -2009,6 +2436,12 @@ impl SM50Op for OpPrmt { } impl SM50Op for OpSuLd { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.handle, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut self.coord, GPR, SrcType::GPR); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xeb00); @@ -2042,6 +2475,13 @@ impl SM50Op for OpSuLd { } impl SM50Op for OpSuAtom { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.coord, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut self.handle, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut self.data, GPR, SrcType::GPR); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { if matches!(self.atom_op, AtomOp::CmpExch) { e.set_opcode(0xeac0); @@ -2092,6 +2532,10 @@ impl SM50Op for OpSuAtom { } impl SM50Op for OpIsberd { + fn legalize(&mut self, _b: &mut LegalizeBuilder) { + // Nothing to do + } + fn encode(&self, e: &mut SM50Encoder<'_>) { e.set_opcode(0xefd0); e.set_dst(self.dst); @@ -2100,6 +2544,12 @@ impl SM50Op for OpIsberd { } impl SM50Op for OpOut { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.handle, GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut self.stream, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.stream.src_ref { SrcRef::Imm32(imm32) => { @@ -2132,6 +2582,11 @@ impl SM50Op for OpOut { } impl SM50Op for OpBfe { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + b.copy_alu_src_if_not_reg(&mut self.base, GPR, SrcType::ALU); + } + fn encode(&self, e: &mut SM50Encoder<'_>) { match &self.range.src_ref { SrcRef::Imm32(imm32) => { @@ -2238,6 +2693,10 @@ fn as_sm50_op(op: &Op) -> &dyn SM50Op { as_sm50_op_match!(op) } +fn as_sm50_op_mut(op: &mut Op) -> &mut dyn SM50Op { + as_sm50_op_match!(op) +} + fn encode_instr( instr_index: usize, instr: Option<&Box>, @@ -2341,3 +2800,7 @@ fn encode_sm50_shader(sm: &ShaderModel50, s: &Shader<'_>) -> Vec { encoded } + +pub fn legalize_sm50_instr(b: &mut LegalizeBuilder, instr: &mut Instr) { + as_sm50_op_mut(&mut instr.op).legalize(b); +}