diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 5cda3025a28..299d2b1c36b 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2151,6 +2151,8 @@ intrinsic("end_primitive_nv", dest_comp=1, src_comp=[1], indices=[STREAM_ID]) # Contains the final primitive handle and indicate the end of emission. intrinsic("final_primitive_nv", src_comp=[1]) +barrier("copy_fs_outputs_nv") + intrinsic("bar_set_nv", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE]) intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1]) # src[] = { bar, bar_set } diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 21f8b11f6fc..7789899f21f 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -2097,6 +2097,62 @@ impl<'a> ShaderFromNir<'a> { data: data, }); } + nir_intrinsic_copy_fs_outputs_nv => { + let ShaderIoInfo::Fragment(info) = &mut self.info.io else { + panic!( + "copy_fs_outputs_nv is only allowed in fragment shaders" + ); + }; + + for i in 0..32 { + // Assume that colors have to come a vec4 at a time + if !self.fs_out_regs[i].is_none() { + info.writes_color |= 0xf << (i & !3) + } + } + let mask_idx = (NAK_FS_OUT_SAMPLE_MASK / 4) as usize; + info.writes_sample_mask = !self.fs_out_regs[mask_idx].is_none(); + let depth_idx = (NAK_FS_OUT_DEPTH / 4) as usize; + info.writes_depth = !self.fs_out_regs[depth_idx].is_none(); + + let mut srcs = Vec::new(); + for i in 0..32 { + if info.writes_color & (1 << i) != 0 { + if self.fs_out_regs[i].is_none() { + srcs.push(0.into()); + } else { + srcs.push(self.fs_out_regs[i].into()); + } + } + } + + // These always come together for some reason + if info.writes_sample_mask || info.writes_depth { + if info.writes_sample_mask { + srcs.push(self.fs_out_regs[mask_idx].into()); + } else { + srcs.push(0.into()); + } + if info.writes_depth { + // Saturate depth writes. + // + // TODO: This seems wrong in light of unrestricted depth + // but it's needed to pass CTS tests for now. + let depth = self.fs_out_regs[depth_idx]; + let sat_depth = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpFAdd { + dst: sat_depth.into(), + srcs: [depth.into(), 0.into()], + saturate: true, + rnd_mode: FRndMode::NearestEven, + ftz: false, + }); + srcs.push(sat_depth.into()); + } + } + + b.push_op(OpFSOut { srcs: srcs }); + } nir_intrinsic_demote | nir_intrinsic_discard | nir_intrinsic_terminate => { @@ -2842,61 +2898,6 @@ impl<'a> ShaderFromNir<'a> { self.set_ssa(&undef.def, dst); } - fn store_fs_outputs(&mut self, b: &mut impl SSABuilder) { - let ShaderIoInfo::Fragment(info) = &mut self.info.io else { - return; - }; - - for i in 0..32 { - // Assume that colors have to come a vec4 at a time - if !self.fs_out_regs[i].is_none() { - info.writes_color |= 0xf << (i & !3) - } - } - let mask_idx = (NAK_FS_OUT_SAMPLE_MASK / 4) as usize; - info.writes_sample_mask = !self.fs_out_regs[mask_idx].is_none(); - let depth_idx = (NAK_FS_OUT_DEPTH / 4) as usize; - info.writes_depth = !self.fs_out_regs[depth_idx].is_none(); - - let mut srcs = Vec::new(); - for i in 0..32 { - if info.writes_color & (1 << i) != 0 { - if self.fs_out_regs[i].is_none() { - srcs.push(0.into()); - } else { - srcs.push(self.fs_out_regs[i].into()); - } - } - } - - // These always come together for some reason - if info.writes_sample_mask || info.writes_depth { - if info.writes_sample_mask { - srcs.push(self.fs_out_regs[mask_idx].into()); - } else { - srcs.push(0.into()); - } - if info.writes_depth { - // Saturate depth writes. - // - // TODO: This seems wrong in light of unrestricted depth but - // it's needed to pass CTS tests for now. - let depth = self.fs_out_regs[depth_idx]; - let sat_depth = b.alloc_ssa(RegFile::GPR, 1); - b.push_op(OpFAdd { - dst: sat_depth.into(), - srcs: [depth.into(), 0.into()], - saturate: true, - rnd_mode: FRndMode::NearestEven, - ftz: false, - }); - srcs.push(sat_depth.into()); - } - } - - b.push_op(OpFSOut { srcs: srcs }); - } - fn parse_block( &mut self, ssa_alloc: &mut SSAValueAllocator, @@ -3040,7 +3041,6 @@ impl<'a> ShaderFromNir<'a> { assert!(succ[1].is_none()); let s0 = succ[0].unwrap(); if s0.index == self.end_block_id { - self.store_fs_outputs(&mut b); b.push_op(OpExit {}); } else { self.cfg.add_edge(nb.index, s0.index); diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index f9590c817f8..db9eacf8054 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -1073,6 +1073,14 @@ nak_nir_lower_fs_outputs(nir_shader *nir) NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out, fs_out_size, 0); + /* We need a copy_fs_outputs_nv intrinsic so NAK knows where to place the + * final copy. This needs to be in the last block, after all store_output + * intrinsics. + */ + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_builder b = nir_builder_at(nir_after_impl(impl)); + nir_copy_fs_outputs_nv(&b); + return true; }