From a3fcccb47bfbaf49a5d1ffa56547973462e70ab0 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 14 Oct 2024 12:16:30 -0500 Subject: [PATCH] nak/from_nir: Handle f16v2 atomics Reviewed-by: Mel Henning Part-of: --- src/nouveau/compiler/nak/from_nir.rs | 48 ++++++++++++++-------------- src/nouveau/compiler/nak/ir.rs | 10 +++--- src/nouveau/compiler/nak_nir.c | 8 +++-- 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index d261d269223..4cca197c063 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -2105,6 +2105,7 @@ impl<'a> ShaderFromNir<'a> { fn get_atomic_type(&self, intrin: &nir_intrinsic_instr) -> AtomType { let bit_size = intrin.def.bit_size(); + let num_comps = intrin.def.num_components(); match intrin.atomic_op() { nir_atomic_op_iadd => AtomType::U(bit_size), nir_atomic_op_imin => AtomType::I(bit_size), @@ -2114,10 +2115,12 @@ impl<'a> ShaderFromNir<'a> { nir_atomic_op_iand => AtomType::U(bit_size), nir_atomic_op_ior => AtomType::U(bit_size), nir_atomic_op_ixor => AtomType::U(bit_size), - nir_atomic_op_xchg => AtomType::U(bit_size), - nir_atomic_op_fadd => AtomType::F(bit_size), - nir_atomic_op_fmin => AtomType::F(bit_size), - nir_atomic_op_fmax => AtomType::F(bit_size), + // Because no comparison is happening, it's safe to use a U32 type + // for xchg of F16v2 data. + nir_atomic_op_xchg => AtomType::U(bit_size * num_comps), + nir_atomic_op_fadd => AtomType::F(bit_size, num_comps), + nir_atomic_op_fmin => AtomType::F(bit_size, num_comps), + nir_atomic_op_fmax => AtomType::F(bit_size, num_comps), nir_atomic_op_cmpxchg => AtomType::U(bit_size), _ => panic!("Unsupported NIR atomic op"), } @@ -2618,12 +2621,9 @@ impl<'a> ShaderFromNir<'a> { let atom_type = self.get_atomic_type(intrin); let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Packed); - assert!( - intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64 - ); - assert!(intrin.def.num_components() == 1); - let dst = - b.alloc_ssa_vec(RegFile::GPR, intrin.def.bit_size() / 32); + let bits = intrin.def.bit_size() * intrin.def.num_components(); + assert!(bits % 32 == 0); + let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8); let data = if intrin.intrinsic == nir_intrinsic_bindless_image_atomic_swap @@ -3027,15 +3027,15 @@ impl<'a> ShaderFromNir<'a> { b.predicate(cond.into()).push_op(OpKill {}); } nir_intrinsic_global_atomic_nv => { - let bit_size = intrin.def.bit_size(); let addr = self.get_src(&srcs[0]); let uaddr = self.get_src(&srcs[1]); let data = self.get_src(&srcs[2]); let atom_type = self.get_atomic_type(intrin); let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate); - assert!(intrin.def.num_components() == 1); - let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32)); + let bits = intrin.def.bit_size() * intrin.def.num_components(); + assert!(bits % 32 == 0); + let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8); let is_reduction = atom_op.is_reduction() && intrin.def.components_read() == 0; @@ -3062,14 +3062,14 @@ impl<'a> ShaderFromNir<'a> { } nir_intrinsic_global_atomic_swap_nv => { assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg); - let bit_size = intrin.def.bit_size(); let addr = self.get_src(&srcs[0]); let cmpr = self.get_src(&srcs[1]); let data = self.get_src(&srcs[2]); - let atom_type = AtomType::U(bit_size); + let atom_type = self.get_atomic_type(intrin); - assert!(intrin.def.num_components() == 1); - let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32)); + let bits = intrin.def.bit_size() * intrin.def.num_components(); + assert!(bits % 32 == 0); + let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8); b.push_op(OpAtom { dst: dst.clone().into(), @@ -3753,15 +3753,15 @@ impl<'a> ShaderFromNir<'a> { || self.nir.info.stage() == MESA_SHADER_KERNEL ); - let bit_size = intrin.def.bit_size(); let addr = self.get_src(&srcs[0]); let uaddr = self.get_src(&srcs[1]); let data = self.get_src(&srcs[2]); let atom_type = self.get_atomic_type(intrin); let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate); - assert!(intrin.def.num_components() == 1); - let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32)); + let bits = intrin.def.bit_size() * intrin.def.num_components(); + assert!(bits % 32 == 0); + let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8); b.push_op(OpAtom { dst: dst.clone().into(), @@ -3786,14 +3786,14 @@ impl<'a> ShaderFromNir<'a> { ); assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg); - let bit_size = intrin.def.bit_size(); let addr = self.get_src(&srcs[0]); let cmpr = self.get_src(&srcs[1]); let data = self.get_src(&srcs[2]); - let atom_type = AtomType::U(bit_size); + let atom_type = self.get_atomic_type(intrin); - assert!(intrin.def.num_components() == 1); - let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32)); + let bits = intrin.def.bit_size() * intrin.def.num_components(); + assert!(bits % 32 == 0); + let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8); b.push_op(OpAtom { dst: dst.clone().into(), diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index f9247b3b2f4..ce5bd268f81 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2623,11 +2623,11 @@ pub enum AtomType { } impl AtomType { - pub fn F(bits: u8) -> AtomType { - match bits { - 16 => panic!("16-bit float atomics not yet supported"), - 32 => AtomType::F32, - 64 => AtomType::F64, + pub fn F(bits: u8, comps: u8) -> AtomType { + match (bits, comps) { + (16, 2) => AtomType::F16v2, + (32, 1) => AtomType::F32, + (64, 1) => AtomType::F64, _ => panic!("Invalid float atomic type"), } } diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 47ef6219033..c30c2b63fae 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -1167,12 +1167,16 @@ type_size_vec4(const struct glsl_type *type, bool bindless) static bool atomic_supported(const nir_instr *instr, const void *data) { - /* Shared atomics don't support 64-bit arithmetic */ + /* Shared atomics don't support and need lowering for: + * - 64-bit arithmetic + * - float32 adds + * - f16vec2 */ const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); nir_atomic_op atomic_op = nir_intrinsic_atomic_op(intr); return !(intr->intrinsic == nir_intrinsic_shared_atomic && (intr->def.bit_size == 64 || - (intr->def.bit_size == 32 && atomic_op == nir_atomic_op_fadd))); + (intr->def.bit_size == 32 && atomic_op == nir_atomic_op_fadd) || + (intr->def.bit_size == 16 && intr->def.num_components == 2))); } static unsigned