mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
gallivm: add atomic 32-bit float support
Reviewed-by: Roland Scheidegger <sroland@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18636>
This commit is contained in:
parent
96a3136ddd
commit
203920d4c6
3 changed files with 90 additions and 13 deletions
|
|
@ -1702,6 +1702,17 @@ visit_atomic_image(struct lp_build_nir_context *bld_base,
|
||||||
case nir_intrinsic_image_atomic_imax:
|
case nir_intrinsic_image_atomic_imax:
|
||||||
params.op = LLVMAtomicRMWBinOpMax;
|
params.op = LLVMAtomicRMWBinOpMax;
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_image_atomic_fadd:
|
||||||
|
params.op = LLVMAtomicRMWBinOpFAdd;
|
||||||
|
break;
|
||||||
|
#if LLVM_VERSION >= 15
|
||||||
|
case nir_intrinsic_image_atomic_fmin:
|
||||||
|
params.op = LLVMAtomicRMWBinOpMin;
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_image_atomic_fmax:
|
||||||
|
params.op = LLVMAtomicRMWBinOpMax;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -2056,6 +2067,9 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
|
||||||
case nir_intrinsic_ssbo_atomic_xor:
|
case nir_intrinsic_ssbo_atomic_xor:
|
||||||
case nir_intrinsic_ssbo_atomic_exchange:
|
case nir_intrinsic_ssbo_atomic_exchange:
|
||||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||||
|
case nir_intrinsic_ssbo_atomic_fadd:
|
||||||
|
case nir_intrinsic_ssbo_atomic_fmin:
|
||||||
|
case nir_intrinsic_ssbo_atomic_fmax:
|
||||||
visit_ssbo_atomic(bld_base, instr, result);
|
visit_ssbo_atomic(bld_base, instr, result);
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_image_load:
|
case nir_intrinsic_image_load:
|
||||||
|
|
@ -2074,6 +2088,9 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
|
||||||
case nir_intrinsic_image_atomic_xor:
|
case nir_intrinsic_image_atomic_xor:
|
||||||
case nir_intrinsic_image_atomic_exchange:
|
case nir_intrinsic_image_atomic_exchange:
|
||||||
case nir_intrinsic_image_atomic_comp_swap:
|
case nir_intrinsic_image_atomic_comp_swap:
|
||||||
|
case nir_intrinsic_image_atomic_fadd:
|
||||||
|
case nir_intrinsic_image_atomic_fmin:
|
||||||
|
case nir_intrinsic_image_atomic_fmax:
|
||||||
visit_atomic_image(bld_base, instr, result);
|
visit_atomic_image(bld_base, instr, result);
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_image_size:
|
case nir_intrinsic_image_size:
|
||||||
|
|
@ -2098,6 +2115,9 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
|
||||||
case nir_intrinsic_shared_atomic_xor:
|
case nir_intrinsic_shared_atomic_xor:
|
||||||
case nir_intrinsic_shared_atomic_exchange:
|
case nir_intrinsic_shared_atomic_exchange:
|
||||||
case nir_intrinsic_shared_atomic_comp_swap:
|
case nir_intrinsic_shared_atomic_comp_swap:
|
||||||
|
case nir_intrinsic_shared_atomic_fadd:
|
||||||
|
case nir_intrinsic_shared_atomic_fmin:
|
||||||
|
case nir_intrinsic_shared_atomic_fmax:
|
||||||
visit_shared_atomic(bld_base, instr, result);
|
visit_shared_atomic(bld_base, instr, result);
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_control_barrier:
|
case nir_intrinsic_control_barrier:
|
||||||
|
|
@ -2131,6 +2151,9 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
|
||||||
case nir_intrinsic_global_atomic_xor:
|
case nir_intrinsic_global_atomic_xor:
|
||||||
case nir_intrinsic_global_atomic_exchange:
|
case nir_intrinsic_global_atomic_exchange:
|
||||||
case nir_intrinsic_global_atomic_comp_swap:
|
case nir_intrinsic_global_atomic_comp_swap:
|
||||||
|
case nir_intrinsic_global_atomic_fadd:
|
||||||
|
case nir_intrinsic_global_atomic_fmin:
|
||||||
|
case nir_intrinsic_global_atomic_fmax:
|
||||||
visit_global_atomic(bld_base, instr, result);
|
visit_global_atomic(bld_base, instr, result);
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_vote_all:
|
case nir_intrinsic_vote_all:
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
|
||||||
}
|
}
|
||||||
|
|
||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size)
|
lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size, bool is_float)
|
||||||
{
|
{
|
||||||
if (bit_size == 64)
|
if (bit_size == 64)
|
||||||
return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
|
return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
|
||||||
|
|
@ -104,7 +104,7 @@ lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size)
|
||||||
else if (bit_size == 8)
|
else if (bit_size == 8)
|
||||||
return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
|
return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
|
||||||
else
|
else
|
||||||
return lp_build_const_int32(gallivm, 0);
|
return is_float ? lp_build_const_float(gallivm, 0) : lp_build_const_int32(gallivm, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
|
|
@ -925,6 +925,25 @@ static void emit_store_global(struct lp_build_nir_context *bld_base,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool atomic_op_is_float(nir_intrinsic_op nir_op)
|
||||||
|
{
|
||||||
|
switch (nir_op) {
|
||||||
|
case nir_intrinsic_shared_atomic_fadd:
|
||||||
|
case nir_intrinsic_shared_atomic_fmin:
|
||||||
|
case nir_intrinsic_shared_atomic_fmax:
|
||||||
|
case nir_intrinsic_global_atomic_fadd:
|
||||||
|
case nir_intrinsic_global_atomic_fmin:
|
||||||
|
case nir_intrinsic_global_atomic_fmax:
|
||||||
|
case nir_intrinsic_ssbo_atomic_fadd:
|
||||||
|
case nir_intrinsic_ssbo_atomic_fmin:
|
||||||
|
case nir_intrinsic_ssbo_atomic_fmax:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static void emit_atomic_global(struct lp_build_nir_context *bld_base,
|
static void emit_atomic_global(struct lp_build_nir_context *bld_base,
|
||||||
nir_intrinsic_op nir_op,
|
nir_intrinsic_op nir_op,
|
||||||
unsigned addr_bit_size,
|
unsigned addr_bit_size,
|
||||||
|
|
@ -936,7 +955,11 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base,
|
||||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||||
LLVMBuilderRef builder = gallivm->builder;
|
LLVMBuilderRef builder = gallivm->builder;
|
||||||
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
||||||
struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size);
|
bool is_flt = atomic_op_is_float(nir_op);
|
||||||
|
struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size);
|
||||||
|
if (is_flt)
|
||||||
|
val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, "");
|
||||||
|
|
||||||
LLVMValueRef atom_res = lp_build_alloca(gallivm,
|
LLVMValueRef atom_res = lp_build_alloca(gallivm,
|
||||||
LLVMTypeOf(val), "");
|
LLVMTypeOf(val), "");
|
||||||
LLVMValueRef exec_mask = mask_vec(bld_base);
|
LLVMValueRef exec_mask = mask_vec(bld_base);
|
||||||
|
|
@ -998,6 +1021,17 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base,
|
||||||
case nir_intrinsic_global_atomic_imax:
|
case nir_intrinsic_global_atomic_imax:
|
||||||
op = LLVMAtomicRMWBinOpMax;
|
op = LLVMAtomicRMWBinOpMax;
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_global_atomic_fadd:
|
||||||
|
op = LLVMAtomicRMWBinOpFAdd;
|
||||||
|
break;
|
||||||
|
#if LLVM_VERSION_MAJOR >= 15
|
||||||
|
case nir_intrinsic_global_atomic_fmin:
|
||||||
|
op = LLVMAtomicRMWBinOpFMin;
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_global_atomic_fmax:
|
||||||
|
op = LLVMAtomicRMWBinOpFmax;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
default:
|
default:
|
||||||
unreachable("unknown atomic op");
|
unreachable("unknown atomic op");
|
||||||
}
|
}
|
||||||
|
|
@ -1093,7 +1127,7 @@ static void emit_load_ubo(struct lp_build_nir_context *bld_base,
|
||||||
|
|
||||||
LLVMValueRef scalar;
|
LLVMValueRef scalar;
|
||||||
/* If loading outside the UBO, we need to skip the load and read 0 instead. */
|
/* If loading outside the UBO, we need to skip the load and read 0 instead. */
|
||||||
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
|
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
|
||||||
LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
|
LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
|
||||||
LLVMBuildStore(builder, zero, res_store);
|
LLVMBuildStore(builder, zero, res_store);
|
||||||
|
|
||||||
|
|
@ -1225,7 +1259,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base,
|
||||||
LLVMValueRef scalar;
|
LLVMValueRef scalar;
|
||||||
/* If loading outside the SSBO, we need to skip the load and read 0 instead. */
|
/* If loading outside the SSBO, we need to skip the load and read 0 instead. */
|
||||||
if (ssbo_limit) {
|
if (ssbo_limit) {
|
||||||
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
|
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
|
||||||
LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
|
LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
|
||||||
LLVMBuildStore(builder, zero, res_store);
|
LLVMBuildStore(builder, zero, res_store);
|
||||||
|
|
||||||
|
|
@ -1285,7 +1319,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base,
|
||||||
LLVMBuildStore(builder, temp_res, result[c]);
|
LLVMBuildStore(builder, temp_res, result[c]);
|
||||||
lp_build_else(&ifthen);
|
lp_build_else(&ifthen);
|
||||||
temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
|
temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
|
||||||
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
|
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
|
||||||
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
|
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
|
||||||
LLVMBuildStore(builder, temp_res, result[c]);
|
LLVMBuildStore(builder, temp_res, result[c]);
|
||||||
lp_build_endif(&ifthen);
|
lp_build_endif(&ifthen);
|
||||||
|
|
@ -1397,6 +1431,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
|
static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
|
||||||
nir_intrinsic_op nir_op,
|
nir_intrinsic_op nir_op,
|
||||||
uint32_t bit_size,
|
uint32_t bit_size,
|
||||||
|
|
@ -1409,7 +1444,8 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
|
||||||
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
||||||
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
||||||
uint32_t shift_val = bit_size_to_shift_size(bit_size);
|
uint32_t shift_val = bit_size_to_shift_size(bit_size);
|
||||||
struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size);
|
bool is_float = atomic_op_is_float(nir_op);
|
||||||
|
struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size);
|
||||||
|
|
||||||
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
|
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
|
||||||
LLVMValueRef atom_res = lp_build_alloca(gallivm,
|
LLVMValueRef atom_res = lp_build_alloca(gallivm,
|
||||||
|
|
@ -1498,6 +1534,20 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
|
||||||
case nir_intrinsic_shared_atomic_imax:
|
case nir_intrinsic_shared_atomic_imax:
|
||||||
op = LLVMAtomicRMWBinOpMax;
|
op = LLVMAtomicRMWBinOpMax;
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_shared_atomic_fadd:
|
||||||
|
case nir_intrinsic_ssbo_atomic_fadd:
|
||||||
|
op = LLVMAtomicRMWBinOpFAdd;
|
||||||
|
break;
|
||||||
|
#if LLVM_VERSION_MAJOR >= 15
|
||||||
|
case nir_intrinsic_shared_atomic_fmin:
|
||||||
|
case nir_intrinsic_ssbo_atomic_fmin:
|
||||||
|
op = LLVMAtomicRMWBinOpFMin;
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_shared_atomic_fmax:
|
||||||
|
case nir_intrinsic_ssbo_atomic_fmax:
|
||||||
|
op = LLVMAtomicRMWBinOpFMax;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
default:
|
default:
|
||||||
unreachable("unknown atomic op");
|
unreachable("unknown atomic op");
|
||||||
}
|
}
|
||||||
|
|
@ -1511,7 +1561,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
|
||||||
LLVMBuildStore(builder, temp_res, atom_res);
|
LLVMBuildStore(builder, temp_res, atom_res);
|
||||||
lp_build_else(&ifthen);
|
lp_build_else(&ifthen);
|
||||||
temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
|
temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
|
||||||
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
|
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float);
|
||||||
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
|
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
|
||||||
LLVMBuildStore(builder, temp_res, atom_res);
|
LLVMBuildStore(builder, temp_res, atom_res);
|
||||||
lp_build_endif(&ifthen);
|
lp_build_endif(&ifthen);
|
||||||
|
|
@ -2581,7 +2631,7 @@ emit_load_scratch(struct lp_build_nir_context *bld_base,
|
||||||
LLVMBuildStore(builder, temp_res, result);
|
LLVMBuildStore(builder, temp_res, result);
|
||||||
lp_build_else(&ifthen);
|
lp_build_else(&ifthen);
|
||||||
temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result, "");
|
temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result, "");
|
||||||
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
|
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
|
||||||
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
|
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
|
||||||
LLVMBuildStore(builder, temp_res, result);
|
LLVMBuildStore(builder, temp_res, result);
|
||||||
lp_build_endif(&ifthen);
|
lp_build_endif(&ifthen);
|
||||||
|
|
|
||||||
|
|
@ -4571,8 +4571,12 @@ lp_build_do_atomic_soa(struct gallivm_state *gallivm,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LLVMTypeRef ref_type = (format == PIPE_FORMAT_R32_FLOAT) ?
|
||||||
|
LLVMFloatTypeInContext(gallivm->context) :
|
||||||
|
LLVMInt32TypeInContext(gallivm->context);
|
||||||
|
|
||||||
LLVMTypeRef atom_res_elem_type =
|
LLVMTypeRef atom_res_elem_type =
|
||||||
LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), type.length);
|
LLVMVectorType(ref_type, type.length);
|
||||||
LLVMValueRef atom_res = lp_build_alloca(gallivm, atom_res_elem_type, "");
|
LLVMValueRef atom_res = lp_build_alloca(gallivm, atom_res_elem_type, "");
|
||||||
|
|
||||||
offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");
|
offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");
|
||||||
|
|
@ -4600,9 +4604,9 @@ lp_build_do_atomic_soa(struct gallivm_state *gallivm,
|
||||||
LLVMValueRef cast_base_ptr =
|
LLVMValueRef cast_base_ptr =
|
||||||
LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
|
LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
|
||||||
cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr,
|
cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr,
|
||||||
LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
|
LLVMPointerType(ref_type, 0), "");
|
||||||
data = LLVMBuildBitCast(gallivm->builder, data,
|
data = LLVMBuildBitCast(gallivm->builder, data,
|
||||||
LLVMInt32TypeInContext(gallivm->context), "");
|
ref_type, "");
|
||||||
|
|
||||||
if (img_op == LP_IMG_ATOMIC_CAS) {
|
if (img_op == LP_IMG_ATOMIC_CAS) {
|
||||||
LLVMValueRef cas_src_ptr =
|
LLVMValueRef cas_src_ptr =
|
||||||
|
|
@ -4610,7 +4614,7 @@ lp_build_do_atomic_soa(struct gallivm_state *gallivm,
|
||||||
loop_state.counter, "");
|
loop_state.counter, "");
|
||||||
LLVMValueRef cas_src =
|
LLVMValueRef cas_src =
|
||||||
LLVMBuildBitCast(gallivm->builder, cas_src_ptr,
|
LLVMBuildBitCast(gallivm->builder, cas_src_ptr,
|
||||||
LLVMInt32TypeInContext(gallivm->context), "");
|
ref_type, "");
|
||||||
data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
|
data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
|
||||||
cas_src,
|
cas_src,
|
||||||
LLVMAtomicOrderingSequentiallyConsistent,
|
LLVMAtomicOrderingSequentiallyConsistent,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue