diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index 3e11d2bbcb8..65ac9e1d24b 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -276,11 +276,7 @@ pub extern "C" fn nak_compile_shader( let info = nak_shader_info { stage: nir.info.stage(), num_gprs: s.info.num_gprs, - num_barriers: if nir.info.uses_control_barrier() { - 1 - } else { - 0 - }, + num_barriers: s.info.num_barriers, tls_size: s.info.tls_size, __bindgen_anon_1: match &s.info.stage { ShaderStageInfo::Compute(cs_info) => { diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 12d02f02efe..092d9ca6329 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -23,6 +23,7 @@ fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo { ShaderInfo { sm: sm, num_gprs: 0, + num_barriers: (nir.info.shared_size > 0).into(), tls_size: nir.scratch_size, uses_global_mem: false, writes_global_mem: false, @@ -1629,8 +1630,13 @@ impl<'a> ShaderFromNir<'a> { }; b.push_op(OpMemBar { scope: mem_scope }); } - if intrin.execution_scope() != SCOPE_NONE { - b.push_op(OpBar {}); + match intrin.execution_scope() { + SCOPE_NONE => (), + SCOPE_WORKGROUP => { + b.push_op(OpWarpSync { mask: u32::MAX }); + b.push_op(OpBar {}).deps.yld = true; + } + _ => panic!("Unhandled execution scope"), } } nir_intrinsic_shared_atomic => { @@ -1920,6 +1926,35 @@ impl<'a> ShaderFromNir<'a> { ) { let mut b = SSAInstrBuilder::new(ssa_alloc); + if nb.index == 0 && self.nir.info.shared_size > 0 { + // The blob seems to always do a BSYNC before accessing shared + // memory. Perhaps this is to ensure that our allocation is + // actually available and not in use by another thread? + let label = self.label_alloc.alloc(); + let bar = BarRef::new(0); + let bmov = b.push_op(OpBMov { + dst: Dst::None, + src: BMovSrc::Barrier(bar), + clear: true, + }); + bmov.deps.yld = true; + + let bssy = b.push_op(OpBSSy { + bar: bar, + cond: SrcRef::True.into(), + target: label, + }); + bssy.deps.yld = true; + + let bsync = b.push_op(OpBSync { + bar: bar, + cond: SrcRef::True.into(), + }); + bsync.deps.yld = true; + + b.push_op(OpNop { label: Some(label) }); + } + let mut phi = OpPhiDsts::new(); for ni in nb.iter_instr_list() { if ni.type_ == nir_instr_type_phi { diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 64981397257..7332cc52931 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -4193,6 +4193,9 @@ impl fmt::Display for InstrDeps { if self.reuse_mask != 0 { write!(f, " reuse={:06b}", self.reuse_mask)?; } + if self.yld { + write!(f, " yld")?; + } Ok(()) } } @@ -4787,6 +4790,7 @@ pub enum ShaderIoInfo { pub struct ShaderInfo { pub sm: u8, pub num_gprs: u8, + pub num_barriers: u8, pub tls_size: u32, pub uses_global_mem: bool, pub writes_global_mem: bool,