From 203920d4c69366b71db1d48d8fb8bd6439016191 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 16 Sep 2022 19:58:59 +1000 Subject: [PATCH] gallivm: add atomic 32-bit float support Reviewed-by: Roland Scheidegger Part-of: --- src/gallium/auxiliary/gallivm/lp_bld_nir.c | 23 +++++++ .../auxiliary/gallivm/lp_bld_nir_soa.c | 68 ++++++++++++++++--- .../auxiliary/gallivm/lp_bld_sample_soa.c | 12 ++-- 3 files changed, 90 insertions(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index b3a913ef59e..34fdb83996c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -1702,6 +1702,17 @@ visit_atomic_image(struct lp_build_nir_context *bld_base, case nir_intrinsic_image_atomic_imax: params.op = LLVMAtomicRMWBinOpMax; break; + case nir_intrinsic_image_atomic_fadd: + params.op = LLVMAtomicRMWBinOpFAdd; + break; +#if LLVM_VERSION >= 15 + case nir_intrinsic_image_atomic_fmin: + params.op = LLVMAtomicRMWBinOpMin; + break; + case nir_intrinsic_image_atomic_fmax: + params.op = LLVMAtomicRMWBinOpMax; + break; +#endif default: break; } @@ -2056,6 +2067,9 @@ visit_intrinsic(struct lp_build_nir_context *bld_base, case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_fadd: + case nir_intrinsic_ssbo_atomic_fmin: + case nir_intrinsic_ssbo_atomic_fmax: visit_ssbo_atomic(bld_base, instr, result); break; case nir_intrinsic_image_load: @@ -2074,6 +2088,9 @@ visit_intrinsic(struct lp_build_nir_context *bld_base, case nir_intrinsic_image_atomic_xor: case nir_intrinsic_image_atomic_exchange: case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic_fadd: + case nir_intrinsic_image_atomic_fmin: + case nir_intrinsic_image_atomic_fmax: visit_atomic_image(bld_base, instr, result); break; case nir_intrinsic_image_size: @@ -2098,6 +2115,9 @@ visit_intrinsic(struct lp_build_nir_context *bld_base, case nir_intrinsic_shared_atomic_xor: case nir_intrinsic_shared_atomic_exchange: case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic_fadd: + case nir_intrinsic_shared_atomic_fmin: + case nir_intrinsic_shared_atomic_fmax: visit_shared_atomic(bld_base, instr, result); break; case nir_intrinsic_control_barrier: @@ -2131,6 +2151,9 @@ visit_intrinsic(struct lp_build_nir_context *bld_base, case nir_intrinsic_global_atomic_xor: case nir_intrinsic_global_atomic_exchange: case nir_intrinsic_global_atomic_comp_swap: + case nir_intrinsic_global_atomic_fadd: + case nir_intrinsic_global_atomic_fmin: + case nir_intrinsic_global_atomic_fmax: visit_global_atomic(bld_base, instr, result); break; case nir_intrinsic_vote_all: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index 90836928ade..b9334bcbd80 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -95,7 +95,7 @@ invocation_0_must_be_active(struct lp_build_nir_context *bld_base) } static LLVMValueRef -lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size) +lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size, bool is_float) { if (bit_size == 64) return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0); @@ -104,7 +104,7 @@ lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size) else if (bit_size == 8) return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0); else - return lp_build_const_int32(gallivm, 0); + return is_float ? lp_build_const_float(gallivm, 0) : lp_build_const_int32(gallivm, 0); } static LLVMValueRef @@ -925,6 +925,25 @@ static void emit_store_global(struct lp_build_nir_context *bld_base, } } +static bool atomic_op_is_float(nir_intrinsic_op nir_op) +{ + switch (nir_op) { + case nir_intrinsic_shared_atomic_fadd: + case nir_intrinsic_shared_atomic_fmin: + case nir_intrinsic_shared_atomic_fmax: + case nir_intrinsic_global_atomic_fadd: + case nir_intrinsic_global_atomic_fmin: + case nir_intrinsic_global_atomic_fmax: + case nir_intrinsic_ssbo_atomic_fadd: + case nir_intrinsic_ssbo_atomic_fmin: + case nir_intrinsic_ssbo_atomic_fmax: + return true; + default: + break; + } + return false; +} + static void emit_atomic_global(struct lp_build_nir_context *bld_base, nir_intrinsic_op nir_op, unsigned addr_bit_size, @@ -936,7 +955,11 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; - struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size); + bool is_flt = atomic_op_is_float(nir_op); + struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size); + if (is_flt) + val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, ""); + LLVMValueRef atom_res = lp_build_alloca(gallivm, LLVMTypeOf(val), ""); LLVMValueRef exec_mask = mask_vec(bld_base); @@ -998,6 +1021,17 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base, case nir_intrinsic_global_atomic_imax: op = LLVMAtomicRMWBinOpMax; break; + case nir_intrinsic_global_atomic_fadd: + op = LLVMAtomicRMWBinOpFAdd; + break; +#if LLVM_VERSION_MAJOR >= 15 + case nir_intrinsic_global_atomic_fmin: + op = LLVMAtomicRMWBinOpFMin; + break; + case nir_intrinsic_global_atomic_fmax: + op = LLVMAtomicRMWBinOpFmax; + break; +#endif default: unreachable("unknown atomic op"); } @@ -1093,7 +1127,7 @@ static void emit_load_ubo(struct lp_build_nir_context *bld_base, LLVMValueRef scalar; /* If loading outside the UBO, we need to skip the load and read 0 instead. */ - LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size); + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false); LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), ""); LLVMBuildStore(builder, zero, res_store); @@ -1225,7 +1259,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, LLVMValueRef scalar; /* If loading outside the SSBO, we need to skip the load and read 0 instead. */ if (ssbo_limit) { - LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size); + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false); LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), ""); LLVMBuildStore(builder, zero, res_store); @@ -1285,7 +1319,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, LLVMBuildStore(builder, temp_res, result[c]); lp_build_else(&ifthen); temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], ""); - LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size); + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false); temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); LLVMBuildStore(builder, temp_res, result[c]); lp_build_endif(&ifthen); @@ -1397,6 +1431,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base, } + static void emit_atomic_mem(struct lp_build_nir_context *bld_base, nir_intrinsic_op nir_op, uint32_t bit_size, @@ -1409,7 +1444,8 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; uint32_t shift_val = bit_size_to_shift_size(bit_size); - struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size); + bool is_float = atomic_op_is_float(nir_op); + struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size); offset = lp_build_shr_imm(uint_bld, offset, shift_val); LLVMValueRef atom_res = lp_build_alloca(gallivm, @@ -1498,6 +1534,20 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, case nir_intrinsic_shared_atomic_imax: op = LLVMAtomicRMWBinOpMax; break; + case nir_intrinsic_shared_atomic_fadd: + case nir_intrinsic_ssbo_atomic_fadd: + op = LLVMAtomicRMWBinOpFAdd; + break; +#if LLVM_VERSION_MAJOR >= 15 + case nir_intrinsic_shared_atomic_fmin: + case nir_intrinsic_ssbo_atomic_fmin: + op = LLVMAtomicRMWBinOpFMin; + break; + case nir_intrinsic_shared_atomic_fmax: + case nir_intrinsic_ssbo_atomic_fmax: + op = LLVMAtomicRMWBinOpFMax; + break; +#endif default: unreachable("unknown atomic op"); } @@ -1511,7 +1561,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, LLVMBuildStore(builder, temp_res, atom_res); lp_build_else(&ifthen); temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, ""); - LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size); + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float); temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); LLVMBuildStore(builder, temp_res, atom_res); lp_build_endif(&ifthen); @@ -2581,7 +2631,7 @@ emit_load_scratch(struct lp_build_nir_context *bld_base, LLVMBuildStore(builder, temp_res, result); lp_build_else(&ifthen); temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result, ""); - LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size); + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false); temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); LLVMBuildStore(builder, temp_res, result); lp_build_endif(&ifthen); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index ba5b83cd5af..37656d95c01 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -4571,8 +4571,12 @@ lp_build_do_atomic_soa(struct gallivm_state *gallivm, return; } + LLVMTypeRef ref_type = (format == PIPE_FORMAT_R32_FLOAT) ? + LLVMFloatTypeInContext(gallivm->context) : + LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef atom_res_elem_type = - LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), type.length); + LLVMVectorType(ref_type, type.length); LLVMValueRef atom_res = lp_build_alloca(gallivm, atom_res_elem_type, ""); offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, ""); @@ -4600,9 +4604,9 @@ lp_build_do_atomic_soa(struct gallivm_state *gallivm, LLVMValueRef cast_base_ptr = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, ""); cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr, - LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); + LLVMPointerType(ref_type, 0), ""); data = LLVMBuildBitCast(gallivm->builder, data, - LLVMInt32TypeInContext(gallivm->context), ""); + ref_type, ""); if (img_op == LP_IMG_ATOMIC_CAS) { LLVMValueRef cas_src_ptr = @@ -4610,7 +4614,7 @@ lp_build_do_atomic_soa(struct gallivm_state *gallivm, loop_state.counter, ""); LLVMValueRef cas_src = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, - LLVMInt32TypeInContext(gallivm->context), ""); + ref_type, ""); data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data, cas_src, LLVMAtomicOrderingSequentiallyConsistent,