nak: Add a source barrier intrinsic

This just inserts a GPU stall until the given source is available.  We
need this in order to properly implement shader clock.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27303>
This commit is contained in:
Faith Ekstrand 2024-01-26 10:16:12 -06:00 committed by Marge Bot
parent d57c79846d
commit 48ebfeba34
4 changed files with 27 additions and 2 deletions

View file

@ -2074,6 +2074,9 @@ intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1])
# src[] = { bar, bar_set }
intrinsic("bar_sync_nv", src_comp=[1, 1])
# Stall until the given SSA value is available
intrinsic("ssa_bar_nv", src_comp=[1])
# NVIDIA-specific system values
system_value("warps_per_sm_nv", 1, bit_sizes=[32])
system_value("sm_count_nv", 1, bit_sizes=[32])

View file

@ -512,8 +512,11 @@ fn calc_delays(f: &mut Function, sm: u8) {
// after every instruction which has an exec latency. Perhaps it has
// something to do with .yld? In any case, the extra 2 cycles aren't worth
// the chance of weird bugs.
f.map_instrs(|instr, _| {
if instr.get_exec_latency(sm) > 1 {
f.map_instrs(|mut instr, _| {
if matches!(instr.op, Op::SrcBar(_)) {
instr.op = Op::Nop(OpNop { label: None });
MappedInstrs::One(instr)
} else if instr.get_exec_latency(sm) > 1 {
let mut nop = Instr::new_boxed(OpNop { label: None });
nop.deps.set_delay(2);
MappedInstrs::Many(vec![instr, nop])

View file

@ -2524,6 +2524,10 @@ impl<'a> ShaderFromNir<'a> {
});
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_ssa_bar_nv => {
let src = self.get_src(&srcs[0]);
b.push_op(OpSrcBar { src });
}
nir_intrinsic_store_global => {
let data = self.get_src(&srcs[0]);
let size_B =

View file

@ -4427,6 +4427,19 @@ impl DisplayOp for OpUndef {
}
impl_display_for_op!(OpUndef);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpSrcBar {
pub src: Src,
}
impl DisplayOp for OpSrcBar {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "src_bar {}", self.src)
}
}
impl_display_for_op!(OpSrcBar);
pub struct VecPair<A, B> {
a: Vec<A>,
b: Vec<B>,
@ -4877,6 +4890,7 @@ pub enum Op {
S2R(OpS2R),
Vote(OpVote),
Undef(OpUndef),
SrcBar(OpSrcBar),
PhiSrcs(OpPhiSrcs),
PhiDsts(OpPhiDsts),
Copy(OpCopy),
@ -5328,6 +5342,7 @@ impl Instr {
// Virtual ops
Op::Undef(_)
| Op::SrcBar(_)
| Op::PhiSrcs(_)
| Op::PhiDsts(_)
| Op::Copy(_)