mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-27 05:30:24 +01:00
nak: Add a source barrier intrinsic
This just inserts a GPU stall until the given source is available. We need this in order to properly implement shader clock. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27303>
This commit is contained in:
parent
d57c79846d
commit
48ebfeba34
4 changed files with 27 additions and 2 deletions
|
|
@ -2074,6 +2074,9 @@ intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1])
|
|||
# src[] = { bar, bar_set }
|
||||
intrinsic("bar_sync_nv", src_comp=[1, 1])
|
||||
|
||||
# Stall until the given SSA value is available
|
||||
intrinsic("ssa_bar_nv", src_comp=[1])
|
||||
|
||||
# NVIDIA-specific system values
|
||||
system_value("warps_per_sm_nv", 1, bit_sizes=[32])
|
||||
system_value("sm_count_nv", 1, bit_sizes=[32])
|
||||
|
|
|
|||
|
|
@ -512,8 +512,11 @@ fn calc_delays(f: &mut Function, sm: u8) {
|
|||
// after every instruction which has an exec latency. Perhaps it has
|
||||
// something to do with .yld? In any case, the extra 2 cycles aren't worth
|
||||
// the chance of weird bugs.
|
||||
f.map_instrs(|instr, _| {
|
||||
if instr.get_exec_latency(sm) > 1 {
|
||||
f.map_instrs(|mut instr, _| {
|
||||
if matches!(instr.op, Op::SrcBar(_)) {
|
||||
instr.op = Op::Nop(OpNop { label: None });
|
||||
MappedInstrs::One(instr)
|
||||
} else if instr.get_exec_latency(sm) > 1 {
|
||||
let mut nop = Instr::new_boxed(OpNop { label: None });
|
||||
nop.deps.set_delay(2);
|
||||
MappedInstrs::Many(vec![instr, nop])
|
||||
|
|
|
|||
|
|
@ -2524,6 +2524,10 @@ impl<'a> ShaderFromNir<'a> {
|
|||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_ssa_bar_nv => {
|
||||
let src = self.get_src(&srcs[0]);
|
||||
b.push_op(OpSrcBar { src });
|
||||
}
|
||||
nir_intrinsic_store_global => {
|
||||
let data = self.get_src(&srcs[0]);
|
||||
let size_B =
|
||||
|
|
|
|||
|
|
@ -4427,6 +4427,19 @@ impl DisplayOp for OpUndef {
|
|||
}
|
||||
impl_display_for_op!(OpUndef);
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpSrcBar {
|
||||
pub src: Src,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpSrcBar {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "src_bar {}", self.src)
|
||||
}
|
||||
}
|
||||
impl_display_for_op!(OpSrcBar);
|
||||
|
||||
pub struct VecPair<A, B> {
|
||||
a: Vec<A>,
|
||||
b: Vec<B>,
|
||||
|
|
@ -4877,6 +4890,7 @@ pub enum Op {
|
|||
S2R(OpS2R),
|
||||
Vote(OpVote),
|
||||
Undef(OpUndef),
|
||||
SrcBar(OpSrcBar),
|
||||
PhiSrcs(OpPhiSrcs),
|
||||
PhiDsts(OpPhiDsts),
|
||||
Copy(OpCopy),
|
||||
|
|
@ -5328,6 +5342,7 @@ impl Instr {
|
|||
|
||||
// Virtual ops
|
||||
Op::Undef(_)
|
||||
| Op::SrcBar(_)
|
||||
| Op::PhiSrcs(_)
|
||||
| Op::PhiDsts(_)
|
||||
| Op::Copy(_)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue