gallivm: Use unified atomics

This is a huge win because gallivm duplicated the translations in a zillion
places.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22914>
This commit is contained in:
Alyssa Rosenzweig 2023-05-08 19:38:16 -04:00 committed by Marge Bot
parent 3c823351c0
commit bd0a2b1608
3 changed files with 54 additions and 233 deletions

View file

@ -1603,10 +1603,10 @@ visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
LLVMValueRef val = get_src(bld_base, instr->src[2]);
LLVMValueRef val2 = NULL;
int bitsize = nir_src_bit_size(instr->src[2]);
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
val2 = get_src(bld_base, instr->src[3]);
bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx,
bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, idx,
offset, val, val2, &result[0]);
}
@ -1681,6 +1681,27 @@ visit_store_image(struct lp_build_nir_context *bld_base,
bld_base->image_op(bld_base, &params);
}
LLVMAtomicRMWBinOp
lp_translate_atomic_op(nir_atomic_op op)
{
switch (op) {
case nir_atomic_op_iadd: return LLVMAtomicRMWBinOpAdd;
case nir_atomic_op_xchg: return LLVMAtomicRMWBinOpXchg;
case nir_atomic_op_iand: return LLVMAtomicRMWBinOpAnd;
case nir_atomic_op_ior: return LLVMAtomicRMWBinOpOr;
case nir_atomic_op_ixor: return LLVMAtomicRMWBinOpXor;
case nir_atomic_op_umin: return LLVMAtomicRMWBinOpUMin;
case nir_atomic_op_umax: return LLVMAtomicRMWBinOpUMax;
case nir_atomic_op_imin: return LLVMAtomicRMWBinOpMin;
case nir_atomic_op_imax: return LLVMAtomicRMWBinOpMax;
case nir_atomic_op_fadd: return LLVMAtomicRMWBinOpFAdd;
#if LLVM_VERSION_MAJOR >= 15
case nir_atomic_op_fmin: return LLVMAtomicRMWBinOpFMin;
case nir_atomic_op_fmax: return LLVMAtomicRMWBinOpFMax;
#endif
default: unreachable("Unexpected atomic");
}
}
static void
visit_atomic_image(struct lp_build_nir_context *bld_base,
@ -1696,48 +1717,8 @@ visit_atomic_image(struct lp_build_nir_context *bld_base,
memset(&params, 0, sizeof(params));
switch (instr->intrinsic) {
case nir_intrinsic_image_atomic_add:
params.op = LLVMAtomicRMWBinOpAdd;
break;
case nir_intrinsic_image_atomic_exchange:
params.op = LLVMAtomicRMWBinOpXchg;
break;
case nir_intrinsic_image_atomic_and:
params.op = LLVMAtomicRMWBinOpAnd;
break;
case nir_intrinsic_image_atomic_or:
params.op = LLVMAtomicRMWBinOpOr;
break;
case nir_intrinsic_image_atomic_xor:
params.op = LLVMAtomicRMWBinOpXor;
break;
case nir_intrinsic_image_atomic_umin:
params.op = LLVMAtomicRMWBinOpUMin;
break;
case nir_intrinsic_image_atomic_umax:
params.op = LLVMAtomicRMWBinOpUMax;
break;
case nir_intrinsic_image_atomic_imin:
params.op = LLVMAtomicRMWBinOpMin;
break;
case nir_intrinsic_image_atomic_imax:
params.op = LLVMAtomicRMWBinOpMax;
break;
case nir_intrinsic_image_atomic_fadd:
params.op = LLVMAtomicRMWBinOpFAdd;
break;
#if LLVM_VERSION_MAJOR >= 15
case nir_intrinsic_image_atomic_fmin:
params.op = LLVMAtomicRMWBinOpFMin;
break;
case nir_intrinsic_image_atomic_fmax:
params.op = LLVMAtomicRMWBinOpFMax;
break;
#endif
default:
break;
}
if (instr->intrinsic != nir_intrinsic_image_atomic_swap)
params.op = lp_translate_atomic_op(nir_intrinsic_atomic_op(instr));
params.target = glsl_sampler_to_pipe(nir_intrinsic_image_dim(instr),
nir_intrinsic_image_array(instr));
@ -1752,7 +1733,7 @@ visit_atomic_image(struct lp_build_nir_context *bld_base,
if (nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_MS)
params.ms_index = get_src(bld_base, instr->src[2]);
if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
if (instr->intrinsic == nir_intrinsic_image_atomic_swap) {
LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
params.indata[0] = in_val;
params.indata2[0] = cas_val;
@ -1762,7 +1743,7 @@ visit_atomic_image(struct lp_build_nir_context *bld_base,
params.outdata = result;
params.img_op =
(instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
(instr->intrinsic == nir_intrinsic_image_atomic_swap)
? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
if (nir_src_is_const(instr->src[0]))
params.image_index = nir_src_as_int(instr->src[0]);
@ -1849,10 +1830,10 @@ visit_shared_atomic(struct lp_build_nir_context *bld_base,
LLVMValueRef val = get_src(bld_base, instr->src[1]);
LLVMValueRef val2 = NULL;
int bitsize = nir_src_bit_size(instr->src[1]);
if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
if (instr->intrinsic == nir_intrinsic_shared_atomic_swap)
val2 = get_src(bld_base, instr->src[2]);
bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL,
bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, NULL,
offset, val, val2, &result[0]);
}
@ -1931,11 +1912,12 @@ visit_global_atomic(struct lp_build_nir_context *bld_base,
LLVMValueRef val2 = NULL;
int addr_bitsize = nir_src_bit_size(instr->src[0]);
int val_bitsize = nir_src_bit_size(instr->src[1]);
if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
if (instr->intrinsic == nir_intrinsic_global_atomic_swap)
val2 = get_src(bld_base, instr->src[2]);
bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
val_bitsize, addr, val, val2, &result[0]);
bld_base->atomic_global(bld_base, nir_intrinsic_atomic_op(instr),
addr_bitsize, val_bitsize, addr, val, val2,
&result[0]);
}
#if LLVM_VERSION_MAJOR >= 10
@ -2086,19 +2068,8 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
case nir_intrinsic_end_primitive:
bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
break;
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_ssbo_atomic_fadd:
case nir_intrinsic_ssbo_atomic_fmin:
case nir_intrinsic_ssbo_atomic_fmax:
case nir_intrinsic_ssbo_atomic:
case nir_intrinsic_ssbo_atomic_swap:
visit_ssbo_atomic(bld_base, instr, result);
break;
case nir_intrinsic_image_load:
@ -2107,19 +2078,8 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
case nir_intrinsic_image_store:
visit_store_image(bld_base, instr);
break;
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_atomic_fadd:
case nir_intrinsic_image_atomic_fmin:
case nir_intrinsic_image_atomic_fmax:
case nir_intrinsic_image_atomic:
case nir_intrinsic_image_atomic_swap:
visit_atomic_image(bld_base, instr, result);
break;
case nir_intrinsic_image_size:
@ -2134,19 +2094,8 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
case nir_intrinsic_store_shared:
visit_shared_store(bld_base, instr);
break;
case nir_intrinsic_shared_atomic_add:
case nir_intrinsic_shared_atomic_imin:
case nir_intrinsic_shared_atomic_umin:
case nir_intrinsic_shared_atomic_imax:
case nir_intrinsic_shared_atomic_umax:
case nir_intrinsic_shared_atomic_and:
case nir_intrinsic_shared_atomic_or:
case nir_intrinsic_shared_atomic_xor:
case nir_intrinsic_shared_atomic_exchange:
case nir_intrinsic_shared_atomic_comp_swap:
case nir_intrinsic_shared_atomic_fadd:
case nir_intrinsic_shared_atomic_fmin:
case nir_intrinsic_shared_atomic_fmax:
case nir_intrinsic_shared_atomic:
case nir_intrinsic_shared_atomic_swap:
visit_shared_atomic(bld_base, instr, result);
break;
case nir_intrinsic_scoped_barrier:
@ -2163,19 +2112,8 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
case nir_intrinsic_store_global:
visit_store_global(bld_base, instr);
break;
case nir_intrinsic_global_atomic_add:
case nir_intrinsic_global_atomic_imin:
case nir_intrinsic_global_atomic_umin:
case nir_intrinsic_global_atomic_imax:
case nir_intrinsic_global_atomic_umax:
case nir_intrinsic_global_atomic_and:
case nir_intrinsic_global_atomic_or:
case nir_intrinsic_global_atomic_xor:
case nir_intrinsic_global_atomic_exchange:
case nir_intrinsic_global_atomic_comp_swap:
case nir_intrinsic_global_atomic_fadd:
case nir_intrinsic_global_atomic_fmin:
case nir_intrinsic_global_atomic_fmax:
case nir_intrinsic_global_atomic:
case nir_intrinsic_global_atomic_swap:
visit_global_atomic(bld_base, instr, result);
break;
case nir_intrinsic_vote_all:
@ -2730,6 +2668,7 @@ bool lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
nir_lower_locals_to_regs(nir);
nir_remove_dead_derefs(nir);
nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
nir_lower_legacy_atomics(nir);
if (is_aos(bld_base)) {
nir_move_vec_src_uses_to_dest(nir);

View file

@ -101,7 +101,7 @@ struct lp_build_nir_context
LLVMValueRef addr, LLVMValueRef dst);
void (*atomic_global)(struct lp_build_nir_context *bld_base,
nir_intrinsic_op op,
nir_atomic_op nir_op,
unsigned addr_bit_size,
unsigned val_bit_size,
LLVMValueRef addr,
@ -119,7 +119,7 @@ struct lp_build_nir_context
LLVMValueRef index, LLVMValueRef offset, LLVMValueRef dst);
void (*atomic_mem)(struct lp_build_nir_context *bld_base,
nir_intrinsic_op op,
nir_atomic_op op,
unsigned bit_size,
LLVMValueRef index, LLVMValueRef offset,
LLVMValueRef val, LLVMValueRef val2,
@ -359,5 +359,7 @@ get_int_bld(struct lp_build_nir_context *bld_base,
unsigned
lp_nir_aos_swizzle(struct lp_build_nir_context *bld_base, unsigned chan);
LLVMAtomicRMWBinOp
lp_translate_atomic_op(nir_atomic_op op);
#endif

View file

@ -991,27 +991,8 @@ static void emit_store_global(struct lp_build_nir_context *bld_base,
}
}
static bool atomic_op_is_float(nir_intrinsic_op nir_op)
{
switch (nir_op) {
case nir_intrinsic_shared_atomic_fadd:
case nir_intrinsic_shared_atomic_fmin:
case nir_intrinsic_shared_atomic_fmax:
case nir_intrinsic_global_atomic_fadd:
case nir_intrinsic_global_atomic_fmin:
case nir_intrinsic_global_atomic_fmax:
case nir_intrinsic_ssbo_atomic_fadd:
case nir_intrinsic_ssbo_atomic_fmin:
case nir_intrinsic_ssbo_atomic_fmax:
return true;
default:
break;
}
return false;
}
static void emit_atomic_global(struct lp_build_nir_context *bld_base,
nir_intrinsic_op nir_op,
nir_atomic_op nir_op,
unsigned addr_bit_size,
unsigned val_bit_size,
LLVMValueRef addr,
@ -1021,7 +1002,7 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base,
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
bool is_flt = atomic_op_is_float(nir_op);
bool is_flt = nir_atomic_op_type(nir_op) == nir_type_float;
struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size);
if (is_flt)
val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, "");
@ -1046,7 +1027,7 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base,
lp_build_if(&ifthen, gallivm, cond);
addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
if (val2 != NULL /* compare-and-swap */) {
LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
loop_state.counter, "");
cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
@ -1057,52 +1038,7 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base,
false);
scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
} else {
LLVMAtomicRMWBinOp op;
switch (nir_op) {
case nir_intrinsic_global_atomic_add:
op = LLVMAtomicRMWBinOpAdd;
break;
case nir_intrinsic_global_atomic_exchange:
op = LLVMAtomicRMWBinOpXchg;
break;
case nir_intrinsic_global_atomic_and:
op = LLVMAtomicRMWBinOpAnd;
break;
case nir_intrinsic_global_atomic_or:
op = LLVMAtomicRMWBinOpOr;
break;
case nir_intrinsic_global_atomic_xor:
op = LLVMAtomicRMWBinOpXor;
break;
case nir_intrinsic_global_atomic_umin:
op = LLVMAtomicRMWBinOpUMin;
break;
case nir_intrinsic_global_atomic_umax:
op = LLVMAtomicRMWBinOpUMax;
break;
case nir_intrinsic_global_atomic_imin:
op = LLVMAtomicRMWBinOpMin;
break;
case nir_intrinsic_global_atomic_imax:
op = LLVMAtomicRMWBinOpMax;
break;
case nir_intrinsic_global_atomic_fadd:
op = LLVMAtomicRMWBinOpFAdd;
break;
#if LLVM_VERSION_MAJOR >= 15
case nir_intrinsic_global_atomic_fmin:
op = LLVMAtomicRMWBinOpFMin;
break;
case nir_intrinsic_global_atomic_fmax:
op = LLVMAtomicRMWBinOpFMax;
break;
#endif
default:
unreachable("unknown atomic op");
}
scalar = LLVMBuildAtomicRMW(builder, op,
scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
addr_ptr, value_ptr,
LLVMAtomicOrderingSequentiallyConsistent,
false);
@ -1506,7 +1442,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
nir_intrinsic_op nir_op,
nir_atomic_op nir_op,
uint32_t bit_size,
LLVMValueRef index, LLVMValueRef offset,
LLVMValueRef val, LLVMValueRef val2,
@ -1517,7 +1453,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
uint32_t shift_val = bit_size_to_shift_size(bit_size);
bool is_float = atomic_op_is_float(nir_op);
bool is_float = nir_atomic_op_type(nir_op) == nir_type_float;
struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size);
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
@ -1557,7 +1493,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
lp_build_if(&ifthen, gallivm, inner_cond);
if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
if (val2 != NULL) {
LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
loop_state.counter, "");
cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
@ -1568,63 +1504,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
false);
scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
} else {
LLVMAtomicRMWBinOp op;
switch (nir_op) {
case nir_intrinsic_shared_atomic_add:
case nir_intrinsic_ssbo_atomic_add:
op = LLVMAtomicRMWBinOpAdd;
break;
case nir_intrinsic_shared_atomic_exchange:
case nir_intrinsic_ssbo_atomic_exchange:
op = LLVMAtomicRMWBinOpXchg;
break;
case nir_intrinsic_shared_atomic_and:
case nir_intrinsic_ssbo_atomic_and:
op = LLVMAtomicRMWBinOpAnd;
break;
case nir_intrinsic_shared_atomic_or:
case nir_intrinsic_ssbo_atomic_or:
op = LLVMAtomicRMWBinOpOr;
break;
case nir_intrinsic_shared_atomic_xor:
case nir_intrinsic_ssbo_atomic_xor:
op = LLVMAtomicRMWBinOpXor;
break;
case nir_intrinsic_shared_atomic_umin:
case nir_intrinsic_ssbo_atomic_umin:
op = LLVMAtomicRMWBinOpUMin;
break;
case nir_intrinsic_shared_atomic_umax:
case nir_intrinsic_ssbo_atomic_umax:
op = LLVMAtomicRMWBinOpUMax;
break;
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_shared_atomic_imin:
op = LLVMAtomicRMWBinOpMin;
break;
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_shared_atomic_imax:
op = LLVMAtomicRMWBinOpMax;
break;
case nir_intrinsic_shared_atomic_fadd:
case nir_intrinsic_ssbo_atomic_fadd:
op = LLVMAtomicRMWBinOpFAdd;
break;
#if LLVM_VERSION_MAJOR >= 15
case nir_intrinsic_shared_atomic_fmin:
case nir_intrinsic_ssbo_atomic_fmin:
op = LLVMAtomicRMWBinOpFMin;
break;
case nir_intrinsic_shared_atomic_fmax:
case nir_intrinsic_ssbo_atomic_fmax:
op = LLVMAtomicRMWBinOpFMax;
break;
#endif
default:
unreachable("unknown atomic op");
}
scalar = LLVMBuildAtomicRMW(builder, op,
scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
scalar_ptr, value_ptr,
LLVMAtomicOrderingSequentiallyConsistent,
false);