diff --git a/src/nouveau/compiler/nak.h b/src/nouveau/compiler/nak.h index 604377b06de..fca0411e1a3 100644 --- a/src/nouveau/compiler/nak.h +++ b/src/nouveau/compiler/nak.h @@ -39,6 +39,9 @@ struct nak_shader_info { /** Number of GPRs used */ uint8_t num_gprs; + /** Number of barriers used */ + uint8_t num_barriers; + /** Size of thread-local storage */ uint32_t tls_size; diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index 743da82f0a7..6bca71bc779 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -461,6 +461,11 @@ pub extern "C" fn nak_compile_shader( let info = nak_shader_info { stage: nir.info.stage(), num_gprs: 255, + num_barriers: if nir.info.uses_control_barrier() { + 1 + } else { + 0 + }, tls_size: nir.scratch_size, cs: nak_shader_info__bindgen_ty_1 { local_size: [ diff --git a/src/nouveau/compiler/nak_calc_instr_deps.rs b/src/nouveau/compiler/nak_calc_instr_deps.rs index d7efd4a7cd3..62ea197dd40 100644 --- a/src/nouveau/compiler/nak_calc_instr_deps.rs +++ b/src/nouveau/compiler/nak_calc_instr_deps.rs @@ -324,7 +324,9 @@ impl Shader { for b in &mut f.blocks.iter_mut().rev() { let mut wt = 0_u8; for (i, instr) in &mut b.instrs.iter_mut().enumerate() { - if instr.is_branch() { + if instr.is_barrier() { + instr.deps.set_yield(true); + } else if instr.is_branch() { instr.deps.add_wt_bar_mask(0x3f); } else { instr.deps.add_wt_bar_mask(wt); diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index 1b0b5501813..44a3bab1a70 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -1178,6 +1178,22 @@ impl SM75Instr { self.set_pred_dst(81..84, Dst::None); } + fn encode_membar(&mut self, op: &OpMemBar) { + self.set_opcode(0x992); + + self.set_bit(72, false); /* !.MMIO */ + self.set_field( + 76..79, + match op.scope { + MemScope::CTA => 0_u8, + MemScope::Cluster => 1_u8, + MemScope::GPU => 2_u8, + MemScope::System => 3_u8, + }, + ); + self.set_bit(80, false); /* .SC */ + } + fn encode_bra( &mut self, op: &OpBra, @@ -1210,6 +1226,30 @@ impl SM75Instr { self.set_field(90..91, false); /* NOT */ } + fn encode_bar(&mut self, op: &OpBar) { + self.set_opcode(0x31d); + + /* src0 == src1 */ + self.set_reg_src(32..40, SrcRef::Zero.into()); + + /* + * 00: RED.POPC + * 01: RED.AND + * 02: RED.OR + */ + self.set_field(74..76, 0_u8); + + /* + * 00: SYNC + * 01: ARV + * 02: RED + * 03: SCAN + */ + self.set_field(77..79, 0_u8); + + self.set_pred_src(87..90, 90, SrcRef::True.into()); + } + fn encode_s2r(&mut self, op: &OpS2R) { self.set_opcode(0x919); self.set_dst(op.dst); @@ -1263,8 +1303,10 @@ impl SM75Instr { Op::ALd(op) => si.encode_ald(&op), Op::ASt(op) => si.encode_ast(&op), Op::Ipa(op) => si.encode_ipa(&op), + Op::MemBar(op) => si.encode_membar(&op), Op::Bra(op) => si.encode_bra(&op, ip, block_offsets), Op::Exit(op) => si.encode_exit(&op), + Op::Bar(op) => si.encode_bar(&op), Op::S2R(op) => si.encode_s2r(&op), _ => panic!("Unhandled instruction"), } diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 1d0b9a375a9..bc004c0dd17 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -950,6 +950,21 @@ impl<'a> ShaderFromNir<'a> { panic!("Indirect UBO indices not yet supported"); } } + nir_intrinsic_barrier => { + if intrin.memory_scope() != SCOPE_NONE { + let mem_scope = match intrin.memory_scope() { + SCOPE_INVOCATION | SCOPE_SUBGROUP => MemScope::CTA, + SCOPE_WORKGROUP | SCOPE_QUEUE_FAMILY | SCOPE_DEVICE => { + MemScope::GPU + } + _ => panic!("Unhandled memory scope"), + }; + self.instrs.push(OpMemBar { scope: mem_scope }.into()); + } + if intrin.execution_scope() != SCOPE_NONE { + self.instrs.push(OpBar {}.into()); + } + } nir_intrinsic_store_global => { let data = self.get_src(&srcs[0]); let size_B = diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index e1ff4239bdf..fc84ccb328e 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -2123,6 +2123,18 @@ impl fmt::Display for OpIpa { } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpMemBar { + pub scope: MemScope, +} + +impl fmt::Display for OpMemBar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "MEMBAR.SC.{}", self.scope) + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpBra { @@ -2145,6 +2157,16 @@ impl fmt::Display for OpExit { } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpBar {} + +impl fmt::Display for OpBar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "BAR.SYNC") + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpS2R { @@ -2474,8 +2496,10 @@ pub enum Op { ALd(OpALd), ASt(OpASt), Ipa(OpIpa), + MemBar(OpMemBar), Bra(OpBra), Exit(OpExit), + Bar(OpBar), S2R(OpS2R), Undef(OpUndef), FMov(OpFMov), @@ -2920,13 +2944,22 @@ impl Instr { } } + pub fn is_barrier(&self) -> bool { + match self.op { + Op::Bar(_) => true, + _ => false, + } + } + pub fn can_eliminate(&self) -> bool { match self.op { Op::ASt(_) | Op::SuSt(_) | Op::St(_) + | Op::MemBar(_) | Op::Bra(_) | Op::Exit(_) + | Op::Bar(_) | Op::FSOut(_) => false, _ => true, } @@ -2966,6 +2999,8 @@ impl Instr { Op::SuSt(_) => None, Op::Ld(_) => None, Op::St(_) => None, + Op::MemBar(_) => None, + Op::Bar(_) => None, Op::Bra(_) | Op::Exit(_) => Some(15), Op::Undef(_) | Op::FMov(_)