mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 02:20:11 +01:00
gallivm/nir: Add a short circuit uniform-offset mode for load_global.
If we know the offset is constant, we don't have ask LLVM to loop over the elements pulling the same value out over and over. This doesn't seem to have produced a win in the testcase I was looking at, but it was an easier entrypoint to figuring out how to do scalar memory access than load_memory, and will probably affect some workload. Reviewed-by: Dave Airlie <airlied@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14999>
This commit is contained in:
parent
d74606d440
commit
181f25aff4
3 changed files with 19 additions and 1 deletions
|
|
@ -1695,9 +1695,10 @@ static void visit_load_global(struct lp_build_nir_context *bld_base,
|
|||
nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
|
||||
{
|
||||
LLVMValueRef addr = get_src(bld_base, instr->src[0]);
|
||||
bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
|
||||
bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
|
||||
nir_src_bit_size(instr->src[0]),
|
||||
addr, result);
|
||||
offset_is_uniform, addr, result);
|
||||
}
|
||||
|
||||
static void visit_store_global(struct lp_build_nir_context *bld_base,
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ struct lp_build_nir_context
|
|||
void (*load_global)(struct lp_build_nir_context *bld_base,
|
||||
unsigned nc, unsigned bit_size,
|
||||
unsigned offset_bit_size,
|
||||
bool offset_is_global,
|
||||
LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
|
||||
|
||||
void (*store_global)(struct lp_build_nir_context *bld_base,
|
||||
|
|
|
|||
|
|
@ -816,6 +816,7 @@ static void emit_load_global(struct lp_build_nir_context *bld_base,
|
|||
unsigned nc,
|
||||
unsigned bit_size,
|
||||
unsigned addr_bit_size,
|
||||
bool offset_is_uniform,
|
||||
LLVMValueRef addr,
|
||||
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
|
||||
{
|
||||
|
|
@ -827,6 +828,21 @@ static void emit_load_global(struct lp_build_nir_context *bld_base,
|
|||
|
||||
res_bld = get_int_bld(bld_base, true, bit_size);
|
||||
|
||||
if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
|
||||
/* If the offset is uniform, then use the address from invocation 0 to
|
||||
* load, and broadcast to all invocations.
|
||||
*/
|
||||
LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
|
||||
lp_build_const_int32(gallivm, 0), "");
|
||||
addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
|
||||
|
||||
for (unsigned c = 0; c < nc; c++) {
|
||||
LLVMValueRef scalar = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
|
||||
outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned c = 0; c < nc; c++) {
|
||||
LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
|
||||
struct lp_build_loop_state loop_state;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue