diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index fe86600ce30..9fdb313fad3 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -2074,6 +2074,9 @@ intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1])
# src[] = { bar, bar_set }
intrinsic("bar_sync_nv", src_comp=[1, 1])
+# Stall until the given SSA value is available
+intrinsic("ssa_bar_nv", src_comp=[1])
+
# NVIDIA-specific system values
system_value("warps_per_sm_nv", 1, bit_sizes=[32])
system_value("sm_count_nv", 1, bit_sizes=[32])
diff --git a/src/nouveau/compiler/nak/calc_instr_deps.rs b/src/nouveau/compiler/nak/calc_instr_deps.rs
index e4f0cdf5f70..91334be1123 100644
--- a/src/nouveau/compiler/nak/calc_instr_deps.rs
+++ b/src/nouveau/compiler/nak/calc_instr_deps.rs
@@ -512,8 +512,11 @@ fn calc_delays(f: &mut Function, sm: u8) {
// after every instruction which has an exec latency. Perhaps it has
// something to do with .yld? In any case, the extra 2 cycles aren't worth
// the chance of weird bugs.
- f.map_instrs(|instr, _| {
- if instr.get_exec_latency(sm) > 1 {
+ f.map_instrs(|mut instr, _| {
+ if matches!(instr.op, Op::SrcBar(_)) {
+ instr.op = Op::Nop(OpNop { label: None });
+ MappedInstrs::One(instr)
+ } else if instr.get_exec_latency(sm) > 1 {
let mut nop = Instr::new_boxed(OpNop { label: None });
nop.deps.set_delay(2);
MappedInstrs::Many(vec![instr, nop])
diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs
index 0a52f4d77d7..34209ba421a 100644
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@@ -2524,6 +2524,10 @@ impl<'a> ShaderFromNir<'a> {
});
self.set_dst(&intrin.def, dst);
}
+ nir_intrinsic_ssa_bar_nv => {
+ let src = self.get_src(&srcs[0]);
+ b.push_op(OpSrcBar { src });
+ }
nir_intrinsic_store_global => {
let data = self.get_src(&srcs[0]);
let size_B =
diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs
index 8be33318fc6..5cb133f12c9 100644
--- a/src/nouveau/compiler/nak/ir.rs
+++ b/src/nouveau/compiler/nak/ir.rs
@@ -4427,6 +4427,19 @@ impl DisplayOp for OpUndef {
}
impl_display_for_op!(OpUndef);
+#[repr(C)]
+#[derive(SrcsAsSlice, DstsAsSlice)]
+pub struct OpSrcBar {
+ pub src: Src,
+}
+
+impl DisplayOp for OpSrcBar {
+ fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "src_bar {}", self.src)
+ }
+}
+impl_display_for_op!(OpSrcBar);
+
pub struct VecPair {
a: Vec,
b: Vec,
@@ -4877,6 +4890,7 @@ pub enum Op {
S2R(OpS2R),
Vote(OpVote),
Undef(OpUndef),
+ SrcBar(OpSrcBar),
PhiSrcs(OpPhiSrcs),
PhiDsts(OpPhiDsts),
Copy(OpCopy),
@@ -5328,6 +5342,7 @@ impl Instr {
// Virtual ops
Op::Undef(_)
+ | Op::SrcBar(_)
| Op::PhiSrcs(_)
| Op::PhiDsts(_)
| Op::Copy(_)