diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 72243a18cd2..2b5b45d8b93 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -1695,9 +1695,10 @@ static void visit_load_global(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { LLVMValueRef addr = get_src(bld_base, instr->src[0]); + bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]); bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), nir_src_bit_size(instr->src[0]), - addr, result); + offset_is_uniform, addr, result); } static void visit_store_global(struct lp_build_nir_context *bld_base, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h index b68bf073111..895897e4bc8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -91,6 +91,7 @@ struct lp_build_nir_context void (*load_global)(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, unsigned offset_bit_size, + bool offset_is_global, LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]); void (*store_global)(struct lp_build_nir_context *bld_base, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index de9565a9072..b0ffe89250a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -816,6 +816,7 @@ static void emit_load_global(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, unsigned addr_bit_size, + bool offset_is_uniform, LLVMValueRef addr, LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) { @@ -827,6 +828,21 @@ static void emit_load_global(struct lp_build_nir_context *bld_base, res_bld = get_int_bld(bld_base, true, bit_size); + if (offset_is_uniform && invocation_0_must_be_active(bld_base)) { + /* If the offset is uniform, then use the address from invocation 0 to + * load, and broadcast to all invocations. + */ + LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, + lp_build_const_int32(gallivm, 0), ""); + addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size); + + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef scalar = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c)); + outval[c] = lp_build_broadcast_scalar(res_bld, scalar); + } + return; + } + for (unsigned c = 0; c < nc; c++) { LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, ""); struct lp_build_loop_state loop_state;