From 48ebfeba34c5e3845b0d3693d7841fb4becc2ec3 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 26 Jan 2024 10:16:12 -0600 Subject: [PATCH] nak: Add a source barrier intrinsic This just inserts a GPU stall until the given source is available. We need this in order to properly implement shader clock. Part-of: --- src/compiler/nir/nir_intrinsics.py | 3 +++ src/nouveau/compiler/nak/calc_instr_deps.rs | 7 +++++-- src/nouveau/compiler/nak/from_nir.rs | 4 ++++ src/nouveau/compiler/nak/ir.rs | 15 +++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index fe86600ce30..9fdb313fad3 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2074,6 +2074,9 @@ intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1]) # src[] = { bar, bar_set } intrinsic("bar_sync_nv", src_comp=[1, 1]) +# Stall until the given SSA value is available +intrinsic("ssa_bar_nv", src_comp=[1]) + # NVIDIA-specific system values system_value("warps_per_sm_nv", 1, bit_sizes=[32]) system_value("sm_count_nv", 1, bit_sizes=[32]) diff --git a/src/nouveau/compiler/nak/calc_instr_deps.rs b/src/nouveau/compiler/nak/calc_instr_deps.rs index e4f0cdf5f70..91334be1123 100644 --- a/src/nouveau/compiler/nak/calc_instr_deps.rs +++ b/src/nouveau/compiler/nak/calc_instr_deps.rs @@ -512,8 +512,11 @@ fn calc_delays(f: &mut Function, sm: u8) { // after every instruction which has an exec latency. Perhaps it has // something to do with .yld? In any case, the extra 2 cycles aren't worth // the chance of weird bugs. - f.map_instrs(|instr, _| { - if instr.get_exec_latency(sm) > 1 { + f.map_instrs(|mut instr, _| { + if matches!(instr.op, Op::SrcBar(_)) { + instr.op = Op::Nop(OpNop { label: None }); + MappedInstrs::One(instr) + } else if instr.get_exec_latency(sm) > 1 { let mut nop = Instr::new_boxed(OpNop { label: None }); nop.deps.set_delay(2); MappedInstrs::Many(vec![instr, nop]) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 0a52f4d77d7..34209ba421a 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -2524,6 +2524,10 @@ impl<'a> ShaderFromNir<'a> { }); self.set_dst(&intrin.def, dst); } + nir_intrinsic_ssa_bar_nv => { + let src = self.get_src(&srcs[0]); + b.push_op(OpSrcBar { src }); + } nir_intrinsic_store_global => { let data = self.get_src(&srcs[0]); let size_B = diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 8be33318fc6..5cb133f12c9 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -4427,6 +4427,19 @@ impl DisplayOp for OpUndef { } impl_display_for_op!(OpUndef); +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpSrcBar { + pub src: Src, +} + +impl DisplayOp for OpSrcBar { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "src_bar {}", self.src) + } +} +impl_display_for_op!(OpSrcBar); + pub struct VecPair { a: Vec, b: Vec, @@ -4877,6 +4890,7 @@ pub enum Op { S2R(OpS2R), Vote(OpVote), Undef(OpUndef), + SrcBar(OpSrcBar), PhiSrcs(OpPhiSrcs), PhiDsts(OpPhiDsts), Copy(OpCopy), @@ -5328,6 +5342,7 @@ impl Instr { // Virtual ops Op::Undef(_) + | Op::SrcBar(_) | Op::PhiSrcs(_) | Op::PhiDsts(_) | Op::Copy(_)