mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-16 08:58:30 +02:00
nak/from_nir: Handle f16v2 atomics
Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37888>
This commit is contained in:
parent
b1fe47e944
commit
a3fcccb47b
3 changed files with 35 additions and 31 deletions
|
|
@ -2105,6 +2105,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
|
||||
fn get_atomic_type(&self, intrin: &nir_intrinsic_instr) -> AtomType {
|
||||
let bit_size = intrin.def.bit_size();
|
||||
let num_comps = intrin.def.num_components();
|
||||
match intrin.atomic_op() {
|
||||
nir_atomic_op_iadd => AtomType::U(bit_size),
|
||||
nir_atomic_op_imin => AtomType::I(bit_size),
|
||||
|
|
@ -2114,10 +2115,12 @@ impl<'a> ShaderFromNir<'a> {
|
|||
nir_atomic_op_iand => AtomType::U(bit_size),
|
||||
nir_atomic_op_ior => AtomType::U(bit_size),
|
||||
nir_atomic_op_ixor => AtomType::U(bit_size),
|
||||
nir_atomic_op_xchg => AtomType::U(bit_size),
|
||||
nir_atomic_op_fadd => AtomType::F(bit_size),
|
||||
nir_atomic_op_fmin => AtomType::F(bit_size),
|
||||
nir_atomic_op_fmax => AtomType::F(bit_size),
|
||||
// Because no comparison is happening, it's safe to use a U32 type
|
||||
// for xchg of F16v2 data.
|
||||
nir_atomic_op_xchg => AtomType::U(bit_size * num_comps),
|
||||
nir_atomic_op_fadd => AtomType::F(bit_size, num_comps),
|
||||
nir_atomic_op_fmin => AtomType::F(bit_size, num_comps),
|
||||
nir_atomic_op_fmax => AtomType::F(bit_size, num_comps),
|
||||
nir_atomic_op_cmpxchg => AtomType::U(bit_size),
|
||||
_ => panic!("Unsupported NIR atomic op"),
|
||||
}
|
||||
|
|
@ -2618,12 +2621,9 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let atom_type = self.get_atomic_type(intrin);
|
||||
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Packed);
|
||||
|
||||
assert!(
|
||||
intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64
|
||||
);
|
||||
assert!(intrin.def.num_components() == 1);
|
||||
let dst =
|
||||
b.alloc_ssa_vec(RegFile::GPR, intrin.def.bit_size() / 32);
|
||||
let bits = intrin.def.bit_size() * intrin.def.num_components();
|
||||
assert!(bits % 32 == 0);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
|
||||
|
||||
let data = if intrin.intrinsic
|
||||
== nir_intrinsic_bindless_image_atomic_swap
|
||||
|
|
@ -3027,15 +3027,15 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.predicate(cond.into()).push_op(OpKill {});
|
||||
}
|
||||
nir_intrinsic_global_atomic_nv => {
|
||||
let bit_size = intrin.def.bit_size();
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let uaddr = self.get_src(&srcs[1]);
|
||||
let data = self.get_src(&srcs[2]);
|
||||
let atom_type = self.get_atomic_type(intrin);
|
||||
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
||||
|
||||
assert!(intrin.def.num_components() == 1);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
|
||||
let bits = intrin.def.bit_size() * intrin.def.num_components();
|
||||
assert!(bits % 32 == 0);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
|
||||
|
||||
let is_reduction =
|
||||
atom_op.is_reduction() && intrin.def.components_read() == 0;
|
||||
|
|
@ -3062,14 +3062,14 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
nir_intrinsic_global_atomic_swap_nv => {
|
||||
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
|
||||
let bit_size = intrin.def.bit_size();
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let cmpr = self.get_src(&srcs[1]);
|
||||
let data = self.get_src(&srcs[2]);
|
||||
let atom_type = AtomType::U(bit_size);
|
||||
let atom_type = self.get_atomic_type(intrin);
|
||||
|
||||
assert!(intrin.def.num_components() == 1);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
|
||||
let bits = intrin.def.bit_size() * intrin.def.num_components();
|
||||
assert!(bits % 32 == 0);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
|
||||
|
||||
b.push_op(OpAtom {
|
||||
dst: dst.clone().into(),
|
||||
|
|
@ -3753,15 +3753,15 @@ impl<'a> ShaderFromNir<'a> {
|
|||
|| self.nir.info.stage() == MESA_SHADER_KERNEL
|
||||
);
|
||||
|
||||
let bit_size = intrin.def.bit_size();
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let uaddr = self.get_src(&srcs[1]);
|
||||
let data = self.get_src(&srcs[2]);
|
||||
let atom_type = self.get_atomic_type(intrin);
|
||||
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
||||
|
||||
assert!(intrin.def.num_components() == 1);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
|
||||
let bits = intrin.def.bit_size() * intrin.def.num_components();
|
||||
assert!(bits % 32 == 0);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
|
||||
|
||||
b.push_op(OpAtom {
|
||||
dst: dst.clone().into(),
|
||||
|
|
@ -3786,14 +3786,14 @@ impl<'a> ShaderFromNir<'a> {
|
|||
);
|
||||
|
||||
assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
|
||||
let bit_size = intrin.def.bit_size();
|
||||
let addr = self.get_src(&srcs[0]);
|
||||
let cmpr = self.get_src(&srcs[1]);
|
||||
let data = self.get_src(&srcs[2]);
|
||||
let atom_type = AtomType::U(bit_size);
|
||||
let atom_type = self.get_atomic_type(intrin);
|
||||
|
||||
assert!(intrin.def.num_components() == 1);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
|
||||
let bits = intrin.def.bit_size() * intrin.def.num_components();
|
||||
assert!(bits % 32 == 0);
|
||||
let dst = b.alloc_ssa_vec(RegFile::GPR, (bits / 32) as u8);
|
||||
|
||||
b.push_op(OpAtom {
|
||||
dst: dst.clone().into(),
|
||||
|
|
|
|||
|
|
@ -2623,11 +2623,11 @@ pub enum AtomType {
|
|||
}
|
||||
|
||||
impl AtomType {
|
||||
pub fn F(bits: u8) -> AtomType {
|
||||
match bits {
|
||||
16 => panic!("16-bit float atomics not yet supported"),
|
||||
32 => AtomType::F32,
|
||||
64 => AtomType::F64,
|
||||
pub fn F(bits: u8, comps: u8) -> AtomType {
|
||||
match (bits, comps) {
|
||||
(16, 2) => AtomType::F16v2,
|
||||
(32, 1) => AtomType::F32,
|
||||
(64, 1) => AtomType::F64,
|
||||
_ => panic!("Invalid float atomic type"),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1167,12 +1167,16 @@ type_size_vec4(const struct glsl_type *type, bool bindless)
|
|||
static bool
|
||||
atomic_supported(const nir_instr *instr, const void *data)
|
||||
{
|
||||
/* Shared atomics don't support 64-bit arithmetic */
|
||||
/* Shared atomics don't support and need lowering for:
|
||||
* - 64-bit arithmetic
|
||||
* - float32 adds
|
||||
* - f16vec2 */
|
||||
const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
nir_atomic_op atomic_op = nir_intrinsic_atomic_op(intr);
|
||||
return !(intr->intrinsic == nir_intrinsic_shared_atomic &&
|
||||
(intr->def.bit_size == 64 ||
|
||||
(intr->def.bit_size == 32 && atomic_op == nir_atomic_op_fadd)));
|
||||
(intr->def.bit_size == 32 && atomic_op == nir_atomic_op_fadd) ||
|
||||
(intr->def.bit_size == 16 && intr->def.num_components == 2)));
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue