From f85069ed57039b66dde7eef2bd9af8dea3d35eff Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 3 Oct 2023 13:19:07 -0500 Subject: [PATCH] nak: Use nak_nir_lower_vtg_io This massively simplifies nak_from_nir.rs because it lets us do all the annoying NIR fiddling in NIR and not in the back-end. Part-of: --- src/nouveau/compiler/nak_encode_sm75.rs | 13 ++ src/nouveau/compiler/nak_from_nir.rs | 295 +++++++++++------------- src/nouveau/compiler/nak_ir.rs | 30 +++ src/nouveau/compiler/nak_nir.c | 47 +--- 4 files changed, 184 insertions(+), 201 deletions(-) diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index 264fe8dcfbe..2f70d196f9a 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -1478,6 +1478,18 @@ impl SM75Instr { } } + fn encode_al2p(&mut self, op: &OpAL2P) { + self.set_opcode(0x920); + + self.set_dst(op.dst); + self.set_reg_src(24..32, op.offset); + + self.set_field(40..50, op.access.addr); + self.set_field(74..76, 0_u8); // comps + assert!(!op.access.patch); + self.set_bit(79, op.access.output); + } + fn encode_ald(&mut self, op: &OpALd) { self.set_opcode(0x321); @@ -1774,6 +1786,7 @@ impl SM75Instr { Op::St(op) => si.encode_st(&op), Op::Atom(op) => si.encode_atom(&op), Op::AtomCas(op) => si.encode_atom_cas(&op), + Op::AL2P(op) => si.encode_al2p(&op), Op::ALd(op) => si.encode_ald(&op), Op::ASt(op) => si.encode_ast(&op), Op::Ipa(op) => si.encode_ipa(&op), diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index d45efa13013..3d5d53e41c5 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -1200,6 +1200,105 @@ impl<'a> ShaderFromNir<'a> { ) { let srcs = intrin.srcs_as_slice(); match intrin.intrinsic { + nir_intrinsic_al2p_nv => { + let offset = self.get_src(&srcs[0]); + let addr = u16::try_from(intrin.base()).unwrap(); + + let flags = intrin.flags(); + let flags: nak_nir_attr_io_flags = + unsafe { std::mem::transmute_copy(&flags) }; + + let access = AttrAccess { + addr: addr, + comps: 1, + patch: flags.patch(), + output: flags.output(), + flags: 0, + }; + + let dst = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpAL2P { + dst: dst.into(), + offset: offset, + access: access, + }); + self.set_dst(&intrin.def, dst); + } + nir_intrinsic_ald_nv | nir_intrinsic_ast_nv => { + let addr = u16::try_from(intrin.base()).unwrap(); + let base = u16::try_from(intrin.range_base()).unwrap(); + let range = u16::try_from(intrin.range()).unwrap(); + let range = base..(base + range); + + let flags = intrin.flags(); + let flags: nak_nir_attr_io_flags = + unsafe { std::mem::transmute_copy(&flags) }; + assert!(!flags.patch() || !flags.phys()); + + if let ShaderIoInfo::Vtg(io) = &mut self.info.io { + if flags.patch() { + match &mut self.info.stage { + ShaderStageInfo::TessellationInit(stage) => { + assert!(flags.output()); + stage.per_patch_attribute_count = max( + stage.per_patch_attribute_count, + (range.end / 4).try_into().unwrap(), + ); + } + ShaderStageInfo::Tessellation => (), + _ => panic!("Patch I/O not supported"), + } + } else { + if flags.output() { + if intrin.intrinsic == nir_intrinsic_ast_nv { + io.mark_store_req(range.clone()); + } + io.mark_attrs_written(range); + } else { + io.mark_attrs_read(range); + } + } + } else { + panic!("Must be a VTG stage"); + } + + let access = AttrAccess { + addr: addr, + comps: intrin.num_components, + patch: flags.patch(), + output: flags.output(), + flags: flags.phys().into(), + }; + + if intrin.intrinsic == nir_intrinsic_ald_nv { + let vtx = self.get_src(&srcs[0]); + let offset = self.get_src(&srcs[1]); + + assert!(intrin.def.bit_size() == 32); + let dst = b.alloc_ssa(RegFile::GPR, access.comps); + b.push_op(OpALd { + dst: dst.into(), + vtx: vtx, + offset: offset, + access: access, + }); + self.set_dst(&intrin.def, dst); + } else if intrin.intrinsic == nir_intrinsic_ast_nv { + assert!(srcs[0].bit_size() == 32); + let data = self.get_src(&srcs[0]); + let vtx = self.get_src(&srcs[1]); + let offset = self.get_src(&srcs[2]); + + b.push_op(OpASt { + data: data, + vtx: vtx, + offset: offset, + access: access, + }); + } else { + panic!("Invalid VTG I/O intrinsic"); + } + } nir_intrinsic_bindless_image_atomic => { let handle = self.get_src(&srcs[0]); let dim = self.get_image_dim(intrin); @@ -1366,173 +1465,33 @@ impl<'a> ShaderFromNir<'a> { }); self.set_dst(&intrin.def, dst); } - nir_intrinsic_load_input - | nir_intrinsic_load_output - | nir_intrinsic_load_per_vertex_input - | nir_intrinsic_load_per_vertex_output - | nir_intrinsic_store_output - | nir_intrinsic_store_per_vertex_output => { - let comps = intrin.num_components; - - let store_data = match intrin.intrinsic { - nir_intrinsic_load_input - | nir_intrinsic_load_output - | nir_intrinsic_load_per_vertex_input - | nir_intrinsic_load_per_vertex_output => { - assert!(intrin.def.bit_size() == 32); - assert!(intrin.def.num_components() == comps); - None - } - nir_intrinsic_store_output - | nir_intrinsic_store_per_vertex_output => { - assert!(srcs[0].bit_size() == 32); - assert!(srcs[0].num_components() == comps); - Some(self.get_src(&srcs[0])) - } - _ => panic!("Unhandled intrinsic"), + nir_intrinsic_load_input => { + let ShaderIoInfo::Fragment(io) = &mut self.info.io else { + panic!("load_input is only used for fragment shaders"); }; - let (vtx, offset, offset_as_u32) = match intrin.intrinsic { - nir_intrinsic_load_input | nir_intrinsic_load_output => ( - Src::new_zero(), - self.get_src(&srcs[0]), - srcs[0].as_uint(), - ), - nir_intrinsic_load_per_vertex_input - | nir_intrinsic_load_per_vertex_output => ( - self.get_src(&srcs[0]), - self.get_src(&srcs[1]), - srcs[1].as_uint(), - ), - nir_intrinsic_store_output => ( - Src::new_zero(), - self.get_src(&srcs[1]), - srcs[1].as_uint(), - ), - nir_intrinsic_store_per_vertex_output => ( - self.get_src(&srcs[1]), - self.get_src(&srcs[2]), - srcs[2].as_uint(), - ), - _ => panic!("Unhandled intrinsic"), - }; + assert!(intrin.def.bit_size() == 32); + let comps = intrin.def.num_components; - let base = u16::try_from(intrin.base()).unwrap(); - let range = u16::try_from(intrin.range()).unwrap(); - let comp = u16::try_from(intrin.component()).unwrap(); + let addr = u16::try_from(intrin.base()).unwrap() + + u16::try_from(srcs[0].as_uint().unwrap()).unwrap() + + 4 * u16::try_from(intrin.component()).unwrap(); - let (range, addr, offset) = match offset_as_u32 { - Some(imm) => { - let imm = u16::try_from(imm).unwrap(); - let addr = base + imm + 4 * comp; - let range = addr..(addr + 4 * u16::from(comps)); - (range, addr, Src::new_zero()) - } - None => { - let range = base..(base + range); - (range, base + 4 * comp, offset) - } - }; + let dst = b.alloc_ssa(RegFile::GPR, comps); + for c in 0..comps { + let c_addr = addr + 4 * u16::from(c); - let stage = self.nir.info.stage(); - let (output, patch) = match intrin.intrinsic { - nir_intrinsic_load_input => { - (false, stage == MESA_SHADER_TESS_EVAL) - } - nir_intrinsic_load_output | nir_intrinsic_store_output => { - (true, stage == MESA_SHADER_TESS_CTRL) - } - nir_intrinsic_load_per_vertex_input => (false, false), - nir_intrinsic_load_per_vertex_output - | nir_intrinsic_store_per_vertex_output => (true, false), - _ => panic!("Unhandled intrinsic"), - }; + io.mark_attr_read(c_addr, PixelImap::Constant); - match &mut self.info.io { - ShaderIoInfo::None => { - panic!("Stage does not support load_input") - } - ShaderIoInfo::Fragment(io) => { - if let Some(data) = store_data { - // We assume these only ever happen in the - // last block. This is ensured by - // nir_lower_io_to_temporaries() - assert!(offset_as_u32 == Some(0)); - assert!(addr % 4 == 0); - let data = data.as_ssa().unwrap(); - for c in 0..usize::from(comps) { - let idx = - usize::from(addr / 4) + usize::from(c); - self.fs_out_regs[idx] = data[c]; - } - } else { - let dst = b.alloc_ssa(RegFile::GPR, comps); - for c in 0..comps { - let c_addr = addr + 4 * u16::from(c); - - io.mark_attr_read(c_addr, PixelImap::Constant); - - b.push_op(OpIpa { - dst: dst[usize::from(c)].into(), - addr: c_addr, - freq: InterpFreq::Constant, - loc: InterpLoc::Default, - offset: SrcRef::Zero.into(), - }); - } - self.set_dst(&intrin.def, dst); - } - } - ShaderIoInfo::Vtg(io) => { - if patch { - match &mut self.info.stage { - ShaderStageInfo::TessellationInit(stage) => { - stage.per_patch_attribute_count = max( - stage.per_patch_attribute_count, - (range.end / 4).try_into().unwrap(), - ); - } - ShaderStageInfo::Tessellation => (), - _ => panic!("Patch I/O not supported"), - } - } else { - if output { - if store_data.is_none() { - io.mark_store_req(range.clone()); - } - io.mark_attrs_written(range); - } else { - io.mark_attrs_read(range); - } - } - - let access = AttrAccess { - addr: addr, - comps: comps, - patch: patch, - output: output, - flags: 0, - }; - - if let Some(data) = store_data { - b.push_op(OpASt { - vtx: vtx, - offset: offset, - data: data, - access: access, - }); - } else { - let dst = b.alloc_ssa(RegFile::GPR, comps); - b.push_op(OpALd { - dst: dst.into(), - vtx: vtx, - offset: offset, - access: access, - }); - self.set_dst(&intrin.def, dst); - } - } + b.push_op(OpIpa { + dst: dst[usize::from(c)].into(), + addr: c_addr, + freq: InterpFreq::Constant, + loc: InterpLoc::Default, + offset: SrcRef::Zero.into(), + }); } + self.set_dst(&intrin.def, dst); } nir_intrinsic_load_interpolated_input => { let bary = @@ -1851,6 +1810,22 @@ impl<'a> ShaderFromNir<'a> { access: access, }); } + nir_intrinsic_store_output => { + let ShaderIoInfo::Fragment(io) = &mut self.info.io else { + panic!("load_input is only used for fragment shaders"); + }; + let data = self.get_src(&srcs[0]); + + let addr = u16::try_from(intrin.base()).unwrap() + + u16::try_from(srcs[1].as_uint().unwrap()).unwrap() + + 4 * u16::try_from(intrin.component()).unwrap(); + assert!(addr % 4 == 0); + + for c in 0..usize::from(intrin.num_components) { + let idx = usize::from(addr / 4) + usize::from(c); + self.fs_out_regs[idx] = data.as_ssa().unwrap()[c]; + } + } nir_intrinsic_store_scratch => { let data = self.get_src(&srcs[0]); let size_B = diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index ddd9d82cdbc..50c658ae961 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -3313,6 +3313,34 @@ impl fmt::Display for OpAtomCas { } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpAL2P { + pub dst: Dst, + + #[src_type(GPR)] + pub offset: Src, + + pub access: AttrAccess, +} + +impl fmt::Display for OpAL2P { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "AL2P")?; + if self.access.output { + write!(f, ".O")?; + } + if self.access.patch { + write!(f, ".P")?; + } + write!(f, " {} a[{:#x}", self.dst, self.access.addr)?; + if !self.offset.is_zero() { + write!(f, "+{}", self.offset)?; + } + write!(f, "]") + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpALd { @@ -4019,6 +4047,7 @@ pub enum Op { St(OpSt), Atom(OpAtom), AtomCas(OpAtomCas), + AL2P(OpAL2P), ALd(OpALd), ASt(OpASt), Ipa(OpIpa), @@ -4446,6 +4475,7 @@ impl Instr { | Op::St(_) | Op::Atom(_) | Op::AtomCas(_) + | Op::AL2P(_) | Op::ALd(_) | Op::ASt(_) | Op::Ipa(_) diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index b4a00d19c57..facc7fb4473 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -304,18 +304,6 @@ nak_sysval_sysval_idx(gl_system_value sysval) } } -static nir_def * -nak_nir_isberd(nir_builder *b, nir_def *vertex) -{ - nir_def *info = nir_load_sysval_nv(b, 32, .base = NAK_SV_INVOCATION_INFO, - .access = ACCESS_CAN_REORDER); - nir_def *lo = nir_extract_u8_imm(b, info, 0); - nir_def *hi = nir_extract_u8_imm(b, info, 2); - nir_def *idx = nir_iadd(b, nir_imul(b, lo, hi), vertex); - - return nir_isberd_nv(b, idx); -} - static bool nak_nir_lower_system_value_instr(nir_builder *b, nir_instr *instr, void *data) { @@ -338,8 +326,8 @@ nak_nir_lower_system_value_instr(nir_builder *b, nir_instr *instr, void *data) case nir_intrinsic_load_primitive_id: { assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || b->shader->info.stage == MESA_SHADER_TESS_EVAL); - nir_def *idx = nak_nir_isberd(b, nir_imm_int(b, 0)); - val = nir_load_per_vertex_input(b, 1, 32, idx, nir_imm_int(b, 0), + val = nir_load_per_vertex_input(b, 1, 32, nir_imm_int(b, 0), + nir_imm_int(b, 0), .base = NAK_ATTR_PRIMITIVE_ID, .dest_type = nir_type_int32); break; @@ -423,23 +411,6 @@ nak_nir_lower_system_values(nir_shader *nir) NULL); } -static bool -lower_per_vertex_io_intrin(nir_builder *b, - nir_intrinsic_instr *intrin, - void *data) -{ - if (intrin->intrinsic != nir_intrinsic_load_per_vertex_input) - return false; - - b->cursor = nir_before_instr(&intrin->instr); - - nir_src *vertex = &intrin->src[0]; - nir_def *idx = nak_nir_isberd(b, vertex->ssa); - nir_src_rewrite(vertex, idx); - - return true; -} - static bool nak_nir_lower_varyings(nir_shader *nir, nir_variable_mode modes) { @@ -452,16 +423,6 @@ nak_nir_lower_varyings(nir_shader *nir, nir_variable_mode modes) OPT(nir, nir_lower_io, modes, type_size_vec4_bytes, 0); - switch (nir->info.stage) { - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - OPT(nir, nir_shader_intrinsics_pass, lower_per_vertex_io_intrin, - nir_metadata_block_index | nir_metadata_dominance, NULL); - default: - break; - } - return progress; } @@ -782,12 +743,16 @@ nak_postprocess_nir(nir_shader *nir, case MESA_SHADER_VERTEX: OPT(nir, nak_nir_lower_vs_inputs); OPT(nir, nak_nir_lower_varyings, nir_var_shader_out); + OPT(nir, nir_opt_constant_folding); + OPT(nir, nak_nir_lower_vtg_io, nak); break; case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: OPT(nir, nak_nir_lower_varyings, nir_var_shader_in | nir_var_shader_out); + OPT(nir, nir_opt_constant_folding); + OPT(nir, nak_nir_lower_vtg_io, nak); break; case MESA_SHADER_FRAGMENT: