diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 11e5533689a..918d527ba67 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1959,6 +1959,13 @@ impl<'a> ShaderFromNir<'a> { di += 1; } } + + if self.sm.sm() < 50 { + // TODO: texbar should be created by calc_instr_deps() and + // should be less conservative than textures_left=0. + // See the old pass: NVC0LegalizePostRA::insertTextureBarriers + b.push_op(OpTexDepBar { textures_left: 0 }); + } self.set_ssa(tex.def.as_def(), nir_dst); } diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index ef6378fa3f2..7befadd74f2 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -5816,6 +5816,25 @@ impl DisplayOp for OpBar { } impl_display_for_op!(OpBar); +/// Instruction only used on Kepler(A|B). +/// Kepler has explicit dependency tracking for texture loads. +/// When a texture load is executed, it is put on some kind of FIFO queue +/// for later execution. +/// Before the results of a texture are used we need to wait on the queue, +/// texdepbar waits until the queue has at most `textures_left` elements. +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpTexDepBar { + pub textures_left: i8, +} + +impl DisplayOp for OpTexDepBar { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "texdepbar {}", self.textures_left) + } +} +impl_display_for_op!(OpTexDepBar); + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpCS2R { @@ -6535,6 +6554,7 @@ pub enum Op { Exit(OpExit), WarpSync(OpWarpSync), Bar(OpBar), + TexDepBar(OpTexDepBar), CS2R(OpCS2R), Isberd(OpIsberd), Kill(OpKill), @@ -6699,6 +6719,7 @@ impl Op { // Miscellaneous ops Op::Bar(_) + | Op::TexDepBar(_) | Op::CS2R(_) | Op::Isberd(_) | Op::Kill(_) @@ -7089,6 +7110,7 @@ impl Instr { | Op::Exit(_) | Op::WarpSync(_) | Op::Bar(_) + | Op::TexDepBar(_) | Op::RegOut(_) | Op::Out(_) | Op::OutFinal(_) diff --git a/src/nouveau/compiler/nak/opt_instr_sched_common.rs b/src/nouveau/compiler/nak/opt_instr_sched_common.rs index ef54bc7c38a..113243b99ee 100644 --- a/src/nouveau/compiler/nak/opt_instr_sched_common.rs +++ b/src/nouveau/compiler/nak/opt_instr_sched_common.rs @@ -193,9 +193,12 @@ pub fn side_effect_type(op: &Op) -> SideEffect { Op::Out(_) | Op::OutFinal(_) => SideEffect::Barrier, // Miscellaneous ops - Op::Bar(_) | Op::CS2R(_) | Op::Isberd(_) | Op::Kill(_) | Op::S2R(_) => { - SideEffect::Barrier - } + Op::Bar(_) + | Op::TexDepBar(_) + | Op::CS2R(_) + | Op::Isberd(_) + | Op::Kill(_) + | Op::S2R(_) => SideEffect::Barrier, Op::PixLd(_) | Op::Nop(_) | Op::Vote(_) => SideEffect::None, // Virtual ops @@ -282,6 +285,7 @@ pub fn estimate_variable_latency(sm: u8, op: &Op) -> u32 { // Miscellaneous ops Op::Bar(_) + | Op::TexDepBar(_) | Op::CS2R(_) | Op::Isberd(_) | Op::Kill(_)