mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-15 00:28:28 +02:00
gallivm/nir/soa: Implement robusst uniform loads without controlflow
Loading from a "zero" address instead speeds up compilation. Reviewed-by: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32963>
This commit is contained in:
parent
178e91956c
commit
29a4886cc8
1 changed files with 29 additions and 30 deletions
|
|
@ -89,6 +89,8 @@ struct lp_build_nir_soa_context
|
|||
LLVMValueRef resources_ptr;
|
||||
LLVMTypeRef thread_data_type;
|
||||
LLVMValueRef thread_data_ptr;
|
||||
LLVMValueRef null_qword_ptr;
|
||||
LLVMValueRef noop_store_ptr;
|
||||
|
||||
LLVMValueRef ssbo_ptr;
|
||||
|
||||
|
|
@ -1153,24 +1155,15 @@ static void emit_load_ubo(struct lp_build_nir_soa_context *bld,
|
|||
break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
for (unsigned c = 0; c < nc; c++) {
|
||||
LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
|
||||
LLVMValueRef in_range = lp_offset_in_range(bld, chan_offset, num_consts);
|
||||
LLVMValueRef ptr = LLVMBuildGEP2(builder, bld_broad->elem_type, consts_ptr, &chan_offset, 1, "");
|
||||
LLVMValueRef null_ptr = LLVMBuildBitCast(builder, bld->null_qword_ptr, LLVMTypeOf(ptr), "");
|
||||
ptr = LLVMBuildSelect(builder, in_range, ptr, null_ptr, "");
|
||||
|
||||
LLVMValueRef scalar;
|
||||
/* If loading outside the UBO, we need to skip the load and read 0 instead. */
|
||||
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
|
||||
LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
|
||||
LLVMBuildStore(builder, zero, res_store);
|
||||
|
||||
struct lp_build_if_state ifthen;
|
||||
lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld, chan_offset, num_consts));
|
||||
LLVMBuildStore(builder, lp_build_pointer_get2(builder, bld_broad->elem_type,
|
||||
consts_ptr, chan_offset), res_store);
|
||||
lp_build_endif(&ifthen);
|
||||
|
||||
scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
|
||||
|
||||
result[c] = lp_build_broadcast_scalar(load_bld, scalar);
|
||||
result[c] = LLVMBuildLoad2(builder, load_bld->elem_type, ptr, "");
|
||||
}
|
||||
} else {
|
||||
LLVMValueRef overflow_mask;
|
||||
|
|
@ -1321,16 +1314,12 @@ static void emit_load_mem(struct lp_build_nir_soa_context *bld,
|
|||
LLVMValueRef scalar;
|
||||
/* If loading outside the SSBO, we need to skip the load and read 0 instead. */
|
||||
if (ssbo_limit) {
|
||||
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
|
||||
LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
|
||||
LLVMBuildStore(builder, zero, res_store);
|
||||
LLVMValueRef in_range = lp_offset_in_range(bld, chan_offset, ssbo_limit);
|
||||
LLVMValueRef ptr = LLVMBuildGEP2(builder, load_bld->elem_type, mem_ptr, &chan_offset, 1, "");
|
||||
LLVMValueRef null_ptr = LLVMBuildBitCast(builder, bld->null_qword_ptr, LLVMTypeOf(ptr), "");
|
||||
ptr = LLVMBuildSelect(builder, in_range, ptr, null_ptr, "");
|
||||
|
||||
struct lp_build_if_state ifthen;
|
||||
lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld, chan_offset, ssbo_limit));
|
||||
LLVMBuildStore(builder, lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset), res_store);
|
||||
lp_build_endif(&ifthen);
|
||||
|
||||
scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
|
||||
scalar = LLVMBuildLoad2(builder, load_bld->elem_type, ptr, "");
|
||||
} else {
|
||||
scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset);
|
||||
}
|
||||
|
|
@ -1414,12 +1403,20 @@ static void emit_store_mem(struct lp_build_nir_soa_context *bld,
|
|||
|
||||
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
|
||||
|
||||
LLVMValueRef exec_mask = mask_vec(bld);
|
||||
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, bld->uint_bld.zero, "");
|
||||
|
||||
/* If the address is uniform, then just store the value from the first
|
||||
* channel instead of making LLVM unroll the invocation loop. Note that we
|
||||
* don't use first_active_uniform(), since we aren't guaranteed that there is
|
||||
* actually an active invocation.
|
||||
*/
|
||||
if (index_and_offset_are_uniform && invocation_0_must_be_active(bld)) {
|
||||
cond = LLVMBuildBitCast(builder, cond, LLVMIntTypeInContext(gallivm->context, bld->base.type.length), "exec_bitmask");
|
||||
cond = LLVMBuildZExt(builder, cond, bld->int_bld.elem_type, "");
|
||||
|
||||
LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, cond, lp_build_const_int32(gallivm, 0), "any_active");
|
||||
|
||||
LLVMValueRef ssbo_limit;
|
||||
LLVMValueRef mem_ptr = mem_access_base_pointer(bld, store_bld, bit_size, payload, index,
|
||||
lp_build_const_int32(gallivm, 0), &ssbo_limit);
|
||||
|
|
@ -1440,10 +1437,11 @@ static void emit_store_mem(struct lp_build_nir_soa_context *bld,
|
|||
|
||||
/* If storing outside the SSBO, we need to skip the store instead. */
|
||||
if (ssbo_limit) {
|
||||
struct lp_build_if_state ifthen;
|
||||
lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld, chan_offset, ssbo_limit));
|
||||
lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
|
||||
lp_build_endif(&ifthen);
|
||||
LLVMValueRef valid_store = LLVMBuildAnd(builder, lp_offset_in_range(bld, chan_offset, ssbo_limit), any_active, "");
|
||||
LLVMValueRef ptr = LLVMBuildGEP2(builder, store_bld->elem_type, mem_ptr, &chan_offset, 1, "");
|
||||
LLVMValueRef noop_ptr = LLVMBuildBitCast(builder, bld->noop_store_ptr, LLVMTypeOf(ptr), "");
|
||||
ptr = LLVMBuildSelect(builder, valid_store, ptr, noop_ptr, "");
|
||||
LLVMBuildStore(builder, value_ptr, ptr);
|
||||
} else {
|
||||
lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
|
||||
}
|
||||
|
|
@ -1451,8 +1449,6 @@ static void emit_store_mem(struct lp_build_nir_soa_context *bld,
|
|||
return;
|
||||
}
|
||||
|
||||
LLVMValueRef exec_mask = mask_vec(bld);
|
||||
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
|
||||
for (unsigned i = 0; i < uint_bld->type.length; i++) {
|
||||
LLVMValueRef counter = lp_build_const_int32(gallivm, i);
|
||||
LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
|
||||
|
|
@ -5897,6 +5893,9 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm,
|
|||
bld.call_context_ptr = params->call_context_ptr;
|
||||
}
|
||||
|
||||
bld.null_qword_ptr = lp_build_alloca(gallivm, bld.uint64_bld.elem_type, "null_qword_ptr");
|
||||
bld.noop_store_ptr = lp_build_alloca_undef(gallivm, bld.uint64_bld.elem_type, "noop_store_ptr");
|
||||
|
||||
emit_prologue(&bld);
|
||||
|
||||
nir_foreach_shader_out_variable(variable, shader)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue