nak/from_nir: Handle f16v2 atomics

Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37888>
This commit is contained in:
Faith Ekstrand 2024-10-14 12:16:30 -05:00 committed by Marge Bot
parent b1fe47e944
commit a3fcccb47b
3 changed files with 35 additions and 31 deletions

View file

@ -2105,6 +2105,7 @@ impl<'a> ShaderFromNir<'a> {
fn get_atomic_type(&self, intrin: &nir_intrinsic_instr) -> AtomType {
let bit_size = intrin.def.bit_size();
let num_comps = intrin.def.num_components();
match intrin.atomic_op() {
nir_atomic_op_iadd => AtomType::U(bit_size),
nir_atomic_op_imin => AtomType::I(bit_size),
@ -2114,10 +2115,12 @@ impl<'a> ShaderFromNir<'a> {
nir_atomic_op_iand => AtomType::U(bit_size),
nir_atomic_op_ior => AtomType::U(bit_size),
nir_atomic_op_ixor => AtomType::U(bit_size),
nir_atomic_op_xchg => AtomType::U(bit_size),
nir_atomic_op_fadd => AtomType::F(bit_size),
nir_atomic_op_fmin => AtomType::F(bit_size),
nir_atomic_op_fmax => AtomType::F(bit_size),
// Because no comparison is happening, it's safe to use a U32 type
// for xchg of F16v2 data.
nir_atomic_op_xchg => AtomType::U(bit_size * num_comps),
nir_atomic_op_fadd => AtomType::F(bit_size, num_comps),
nir_atomic_op_fmin => AtomType::F(bit_size, num_comps),
nir_atomic_op_fmax => AtomType::F(bit_size, num_comps),
nir_atomic_op_cmpxchg => AtomType::U(bit_size),
_ => panic!("Unsupported NIR atomic op"),
}
@ -2618,12 +2621,9 @@ impl<'a> ShaderFromNir<'a> {
let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Packed);
assert!(
intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64
);
assert!(intrin.def.num_components() == 1);
let dst =
b.alloc_ssa_vec(RegFile::GPR, intrin.def.bit_size() / 32);
let bits = intrin.def.bit_size() * intrin.def.num_components();
assert!(bits % 32 == 0);
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
let data = if intrin.intrinsic
== nir_intrinsic_bindless_image_atomic_swap
@ -3027,15 +3027,15 @@ impl<'a> ShaderFromNir<'a> {
b.predicate(cond.into()).push_op(OpKill {});
}
nir_intrinsic_global_atomic_nv => {
let bit_size = intrin.def.bit_size();
let addr = self.get_src(&srcs[0]);
let uaddr = self.get_src(&srcs[1]);
let data = self.get_src(&srcs[2]);
let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
assert!(intrin.def.num_components() == 1);
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
let bits = intrin.def.bit_size() * intrin.def.num_components();
assert!(bits % 32 == 0);
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
let is_reduction =
atom_op.is_reduction() && intrin.def.components_read() == 0;
@ -3062,14 +3062,14 @@ impl<'a> ShaderFromNir<'a> {
}
nir_intrinsic_global_atomic_swap_nv => {
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
let bit_size = intrin.def.bit_size();
let addr = self.get_src(&srcs[0]);
let cmpr = self.get_src(&srcs[1]);
let data = self.get_src(&srcs[2]);
let atom_type = AtomType::U(bit_size);
let atom_type = self.get_atomic_type(intrin);
assert!(intrin.def.num_components() == 1);
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
let bits = intrin.def.bit_size() * intrin.def.num_components();
assert!(bits % 32 == 0);
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
b.push_op(OpAtom {
dst: dst.clone().into(),
@ -3753,15 +3753,15 @@ impl<'a> ShaderFromNir<'a> {
|| self.nir.info.stage() == MESA_SHADER_KERNEL
);
let bit_size = intrin.def.bit_size();
let addr = self.get_src(&srcs[0]);
let uaddr = self.get_src(&srcs[1]);
let data = self.get_src(&srcs[2]);
let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
assert!(intrin.def.num_components() == 1);
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
let bits = intrin.def.bit_size() * intrin.def.num_components();
assert!(bits % 32 == 0);
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
b.push_op(OpAtom {
dst: dst.clone().into(),
@ -3786,14 +3786,14 @@ impl<'a> ShaderFromNir<'a> {
);
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
let bit_size = intrin.def.bit_size();
let addr = self.get_src(&srcs[0]);
let cmpr = self.get_src(&srcs[1]);
let data = self.get_src(&srcs[2]);
let atom_type = AtomType::U(bit_size);
let atom_type = self.get_atomic_type(intrin);
assert!(intrin.def.num_components() == 1);
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
let bits = intrin.def.bit_size() * intrin.def.num_components();
assert!(bits % 32 == 0);
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
b.push_op(OpAtom {
dst: dst.clone().into(),

View file

@ -2623,11 +2623,11 @@ pub enum AtomType {
}
impl AtomType {
pub fn F(bits: u8) -> AtomType {
match bits {
16 => panic!("16-bit float atomics not yet supported"),
32 => AtomType::F32,
64 => AtomType::F64,
pub fn F(bits: u8, comps: u8) -> AtomType {
match (bits, comps) {
(16, 2) => AtomType::F16v2,
(32, 1) => AtomType::F32,
(64, 1) => AtomType::F64,
_ => panic!("Invalid float atomic type"),
}
}

View file

@ -1167,12 +1167,16 @@ type_size_vec4(const struct glsl_type *type, bool bindless)
static bool
atomic_supported(const nir_instr *instr, const void *data)
{
/* Shared atomics don't support 64-bit arithmetic */
/* Shared atomics don't support and need lowering for:
* - 64-bit arithmetic
* - float32 adds
* - f16vec2 */
const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
nir_atomic_op atomic_op = nir_intrinsic_atomic_op(intr);
return !(intr->intrinsic == nir_intrinsic_shared_atomic &&
(intr->def.bit_size == 64 ||
(intr->def.bit_size == 32 && atomic_op == nir_atomic_op_fadd)));
(intr->def.bit_size == 32 && atomic_op == nir_atomic_op_fadd) ||
(intr->def.bit_size == 16 && intr->def.num_components == 2)));
}
static unsigned