nir/opt_uniform_atomics: Handle unified atomics

This is the one place where using nir_atomic_op instead of nir_op directly is a
little annoying, since we need to translate between the two enums, but it's not
a big deal.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22914>
This commit is contained in:
Alyssa Rosenzweig 2023-05-12 10:55:28 -04:00 committed by Marge Bot
parent b1fd8d27fc
commit 55f7fd6d0d

View file

@ -40,10 +40,39 @@
#include "nir/nir_builder.h"
static nir_op
parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src,
unsigned *offset2_src)
atomic_op_to_alu(nir_atomic_op op)
{
switch (op) {
case nir_atomic_op_iadd: return nir_op_iadd;
case nir_atomic_op_imin: return nir_op_imin;
case nir_atomic_op_umin: return nir_op_umin;
case nir_atomic_op_imax: return nir_op_imax;
case nir_atomic_op_umax: return nir_op_umax;
case nir_atomic_op_iand: return nir_op_iand;
case nir_atomic_op_ior: return nir_op_ior;
case nir_atomic_op_ixor: return nir_op_ixor;
case nir_atomic_op_fadd: return nir_op_fadd;
case nir_atomic_op_fmin: return nir_op_fmin;
case nir_atomic_op_fmax: return nir_op_fmax;
/* We don't handle exchanges or wraps */
case nir_atomic_op_xchg:
case nir_atomic_op_cmpxchg:
case nir_atomic_op_fcmpxchg:
case nir_atomic_op_inc_wrap:
case nir_atomic_op_dec_wrap:
return nir_num_opcodes;
}
unreachable("Unknown atomic op");
}
static nir_op
parse_atomic_op(nir_intrinsic_instr *intr, unsigned *offset_src,
unsigned *data_src, unsigned *offset2_src)
{
switch (intr->intrinsic) {
/* Legacy atomics */
#define OP_NOIMG(intrin, alu) \
case nir_intrinsic_ssbo_atomic_##intrin: \
*offset_src = 1; \
@ -84,6 +113,33 @@ parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src,
OP_NOIMG(fmax, fmax)
#undef OP_NOIMG
#undef OP
/* Unified atomics */
case nir_intrinsic_ssbo_atomic:
*offset_src = 1;
*data_src = 2;
*offset2_src = *offset_src;
return atomic_op_to_alu(nir_intrinsic_atomic_op(intr));
case nir_intrinsic_shared_atomic:
case nir_intrinsic_global_atomic:
case nir_intrinsic_deref_atomic:
*offset_src = 0;
*data_src = 1;
*offset2_src = *offset_src;
return atomic_op_to_alu(nir_intrinsic_atomic_op(intr));
case nir_intrinsic_global_atomic_amd:
*offset_src = 0;
*data_src = 1;
*offset2_src = 2;
return atomic_op_to_alu(nir_intrinsic_atomic_op(intr));
case nir_intrinsic_image_deref_atomic:
case nir_intrinsic_image_atomic:
case nir_intrinsic_bindless_image_atomic:
*offset_src = 1;
*data_src = 3;
*offset2_src = *offset_src;
return atomic_op_to_alu(nir_intrinsic_atomic_op(intr));
default:
return nir_num_opcodes;
}
@ -211,7 +267,7 @@ optimize_atomic(nir_builder *b, nir_intrinsic_instr *intrin, bool return_prev)
unsigned offset_src = 0;
unsigned data_src = 0;
unsigned offset2_src = 0;
nir_op op = parse_atomic_op(intrin->intrinsic, &offset_src, &data_src, &offset2_src);
nir_op op = parse_atomic_op(intrin, &offset_src, &data_src, &offset2_src);
nir_ssa_def *data = intrin->src[data_src].ssa;
/* Separate uniform reduction and scan is faster than doing a combined scan+reduce */
@ -295,7 +351,7 @@ opt_uniform_atomics(nir_function_impl *impl)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
unsigned offset_src, data_src, offset2_src;
if (parse_atomic_op(intrin->intrinsic, &offset_src, &data_src, &offset2_src) ==
if (parse_atomic_op(intrin, &offset_src, &data_src, &offset2_src) ==
nir_num_opcodes)
continue;