diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 2b5b45d8b93..d7741abe7c1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -1397,8 +1397,9 @@ static void visit_load_ssbo(struct lp_build_nir_context *bld_base, { LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32); LLVMValueRef offset = get_src(bld_base, instr->src[1]); + bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[0]) && nir_src_is_always_uniform(instr->src[1]); bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), - idx, offset, result); + index_and_offset_are_uniform, idx, offset, result); } static void visit_store_ssbo(struct lp_build_nir_context *bld_base, @@ -1634,8 +1635,9 @@ static void visit_shared_load(struct lp_build_nir_context *bld_base, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { LLVMValueRef offset = get_src(bld_base, instr->src[0]); + bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]); bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), - NULL, offset, result); + offset_is_uniform, NULL, offset, result); } static void visit_shared_store(struct lp_build_nir_context *bld_base, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h index 895897e4bc8..a71181f9ddb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -111,6 +111,7 @@ struct lp_build_nir_context /* for SSBO and shared memory */ void (*load_mem)(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, + bool index_and_offset_are_uniform, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]); void (*store_mem)(struct lp_build_nir_context *bld_base, unsigned writemask, unsigned nc, unsigned bit_size, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index b0ffe89250a..558245ca641 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -1143,6 +1143,7 @@ mem_access_base_pointer(struct lp_build_nir_context *bld_base, static void emit_load_mem(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, + bool index_and_offset_are_uniform, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) @@ -1158,6 +1159,42 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), ""); + /* If the address is uniform, then use the address from invocation 0 to load, + * and broadcast to all invocations. + */ + if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) { + LLVMValueRef ssbo_limit; + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index, + lp_build_const_int32(gallivm, 0), &ssbo_limit); + + offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), ""); + + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), ""); + + LLVMValueRef scalar; + /* If loading outside the SSBO, we need to skip the load and read 0 instead. */ + if (ssbo_limit) { + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size); + LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), ""); + LLVMBuildStore(builder, zero, res_store); + + LLVMValueRef fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, ssbo_limit, chan_offset, ""); + struct lp_build_if_state ifthen; + lp_build_if(&ifthen, gallivm, fetch_cond); + LLVMBuildStore(builder, lp_build_pointer_get(builder, mem_ptr, chan_offset), res_store); + lp_build_endif(&ifthen); + + scalar = LLVMBuildLoad(builder, res_store, ""); + } else { + scalar = lp_build_pointer_get(builder, mem_ptr, chan_offset); + } + + outval[c] = lp_build_broadcast_scalar(load_bld, scalar); + } + return; + } + /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];