diff --git a/src/nouveau/compiler/nak_encode_tu102.rs b/src/nouveau/compiler/nak_encode_tu102.rs index d0e0460c318..1d7c9cdafa7 100644 --- a/src/nouveau/compiler/nak_encode_tu102.rs +++ b/src/nouveau/compiler/nak_encode_tu102.rs @@ -225,6 +225,65 @@ fn encode_ast(bs: &mut impl BitSetMut, instr: &Instr, attr: &AttrAccess) { assert!(!attr.out_load); } +fn encode_mem_access(bs: &mut impl BitSetMut, access: &MemAccess) { + bs.set_field( + 72..73, + match access.addr_type { + MemAddrType::A32 => 0_u8, + MemAddrType::A64 => 1_u8, + }, + ); + bs.set_field( + 73..76, + match access.mem_type { + MemType::U8 => 0_u8, + MemType::I8 => 1_u8, + MemType::U16 => 2_u8, + MemType::I16 => 3_u8, + MemType::B32 => 4_u8, + MemType::B64 => 5_u8, + MemType::B128 => 6_u8, + }, + ); + bs.set_field( + 77..79, + match access.scope { + MemScope::CTA => 0_u8, + MemScope::Cluster => 1_u8, + MemScope::GPU => 2_u8, + MemScope::System => 3_u8, + }, + ); + bs.set_field( + 79..81, + match access.order { + /* Constant => 0_u8, */ + /* Weak? => 1_u8, */ + MemOrder::Strong => 2_u8, + /* MMIO => 3_u8, */ + }, + ); +} + +fn encode_ld(bs: &mut impl BitSetMut, instr: &Instr, access: &MemAccess) { + encode_instr_base(bs, &instr, 0x980); + + encode_reg(bs, 24..32, *instr.src(0).as_reg().unwrap()); + bs.set_field(32..64, 0_u32 /* Immediate offset */); + + encode_mem_access(bs, access); +} + +fn encode_st(bs: &mut impl BitSetMut, instr: &Instr, access: &MemAccess) { + encode_instr_base(bs, &instr, 0x385); + + encode_reg(bs, 24..32, *instr.src(0).as_reg().unwrap()); + bs.set_field(32..64, 0_u32 /* Immediate offset */); + encode_reg(bs, 64..72, *instr.src(1).as_reg().unwrap()); + + encode_mem_access(bs, access); +} + fn encode_exit(bs: &mut impl BitSetMut, instr: &Instr) { encode_instr_base(bs, instr, 0x94d); @@ -242,6 +301,8 @@ pub fn encode_instr(instr: &Instr) -> [u32; 4] { Opcode::MOV => encode_mov(&mut bs, instr), Opcode::ALD(a) => encode_ald(&mut bs, instr, &a), Opcode::AST(a) => encode_ast(&mut bs, instr, &a), + Opcode::LD(a) => encode_ld(&mut bs, instr, a), + Opcode::ST(a) => encode_st(&mut bs, instr, a), Opcode::EXIT => encode_exit(&mut bs, instr), _ => panic!("Unhandled instruction"), } diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 6ceeeb1f50c..a0198ddd89a 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -108,6 +108,20 @@ impl<'a> ShaderFromNir<'a> { fn parse_intrinsic(&mut self, intrin: &nir_intrinsic_instr) { let srcs = intrin.srcs_as_slice(); match intrin.intrinsic { + nir_intrinsic_load_global => { + let size_B = + (intrin.def.bit_size() / 8) * intrin.def.num_components(); + assert!(u32::from(size_B) <= intrin.align()); + let access = MemAccess { + addr_type: MemAddrType::A64, + mem_type: MemType::from_size(size_B, false), + order: MemOrder::Strong, + scope: MemScope::System, + }; + let addr = self.get_src(&srcs[0]); + let dst = self.get_dst(&intrin.def); + self.instrs.push(Instr::new_ld(dst, access, addr)); + } nir_intrinsic_load_input => { let addr = u16::try_from(intrin.base()).unwrap(); let vtx = Src::new_zero(); @@ -152,6 +166,20 @@ impl<'a> ShaderFromNir<'a> { panic!("Indirect UBO indices not yet supported"); } } + nir_intrinsic_store_global => { + let data = self.get_src(&srcs[0]); + let size_B = + (srcs[0].bit_size() / 8) * srcs[0].num_components(); + assert!(u32::from(size_B) <= intrin.align()); + let access = MemAccess { + addr_type: MemAddrType::A64, + mem_type: MemType::from_size(size_B, false), + order: MemOrder::Strong, + scope: MemScope::System, + }; + let addr = self.get_src(&srcs[1]); + self.instrs.push(Instr::new_st(access, addr, data)); + } nir_intrinsic_store_output => { if self.nir.info.stage() == MESA_SHADER_FRAGMENT { /* We assume these only ever happen in the last block. diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 1e08053f492..2d1fe188521 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -394,6 +394,120 @@ pub struct AttrAccess { pub flags: u8, } +pub enum MemAddrType { + A32, + A64, +} + +impl fmt::Display for MemAddrType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MemAddrType::A32 => write!(f, "A32"), + MemAddrType::A64 => write!(f, "A64"), + } + } +} + +pub enum MemType { + U8, + I8, + U16, + I16, + B32, + B64, + B128, +} + +impl MemType { + pub fn from_size(size: u8, is_signed: bool) -> MemType { + match size { + 1 => { + if is_signed { + MemType::I8 + } else { + MemType::U8 + } + } + 2 => { + if is_signed { + MemType::I16 + } else { + MemType::U16 + } + } + 4 => MemType::B32, + 8 => MemType::B64, + 16 => MemType::B128, + _ => panic!("Invalid memory load/store size"), + } + } +} + +impl fmt::Display for MemType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MemType::U8 => write!(f, "U8"), + MemType::I8 => write!(f, "I8"), + MemType::U16 => write!(f, "U16"), + MemType::I16 => write!(f, "I16"), + MemType::B32 => write!(f, "B32"), + MemType::B64 => write!(f, "B64"), + MemType::B128 => write!(f, "B128"), + } + } +} + +pub enum MemOrder { + Strong, +} + +pub enum MemScope { + CTA, + Cluster, + GPU, + System, +} + +impl fmt::Display for MemScope { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MemScope::CTA => write!(f, "CTA"), + MemScope::Cluster => write!(f, "SM"), + MemScope::GPU => write!(f, "GPU"), + MemScope::System => write!(f, "SYS"), + } + } +} + +pub enum MemSpace { + Global, + Local, + Shared, +} + +impl fmt::Display for MemSpace { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MemSpace::Global => write!(f, "GLOBAL"), + MemSpace::Local => write!(f, "LOCAL"), + MemSpace::Shared => write!(f, "SHARED"), + } + } +} + +pub struct MemAccess { + pub addr_type: MemAddrType, + pub mem_type: MemType, + pub order: MemOrder, + pub scope: MemScope, +} + +impl fmt::Display for MemAccess { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}.{}.{}", self.addr_type, self.mem_type, self.scope) + } +} + const MIN_INSTR_DELAY: u8 = 1; const MAX_INSTR_DELAY: u8 = 15; @@ -563,6 +677,18 @@ impl Instr { instr } + pub fn new_ld(dst: Dst, access: MemAccess, addr: Src) -> Instr { + Instr::new( + Opcode::LD(access), + slice::from_ref(&dst), + slice::from_ref(&addr), + ) + } + + pub fn new_st(access: MemAccess, addr: Src, data: Src) -> Instr { + Instr::new(Opcode::ST(access), &[], &[addr, data]) + } + pub fn new_fs_out(srcs: &[Src]) -> Instr { Instr::new(Opcode::FS_OUT, &[], srcs) } @@ -617,7 +743,9 @@ impl Instr { pub fn can_eliminate(&self) -> bool { match self.op { - Opcode::FS_OUT | Opcode::EXIT | Opcode::AST(_) => false, + Opcode::FS_OUT | Opcode::EXIT | Opcode::AST(_) | Opcode::ST(_) => { + false + } _ => true, } } @@ -658,6 +786,8 @@ pub enum Opcode { ALD(AttrAccess), AST(AttrAccess), + LD(MemAccess), + ST(MemAccess), FS_OUT, @@ -679,6 +809,8 @@ impl fmt::Display for Opcode { Opcode::SPLIT => write!(f, "SPLIT"), Opcode::ALD(_) => write!(f, "ALD"), Opcode::AST(_) => write!(f, "AST"), + Opcode::LD(a) => write!(f, "LD.{}", a), + Opcode::ST(a) => write!(f, "ST.{}", a), Opcode::FS_OUT => write!(f, "FS_OUT"), Opcode::EXIT => write!(f, "EXIT"), }