mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
nir/opt_offsets: add callback to set need_nuw per intrinsic
Wether need_nuw is used is currently decided in two different ways: - globally through the allow_offset_wrap option; - per intrinsic but hard-coded in opt_offsets. Make this more flexible by creating a callback that is called per intrinsic. This will allow backends to decide, on a per-intrinsic basis, whether need_nuw is needed. Note that the main use case for ir3 is to add support for opt_offsets for global memory accesses. Other intrinsics don't need need_nuw but global memory accesses do. Signed-off-by: Job Noorman <jnoorman@igalia.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37114>
This commit is contained in:
parent
bc03086320
commit
e78bd88a06
5 changed files with 33 additions and 16 deletions
|
|
@ -6339,7 +6339,7 @@ typedef struct nir_opt_offsets_options {
|
|||
* Allow the offset calculation to wrap. If false, constant additions that
|
||||
* might wrap will not be folded into the offset.
|
||||
*/
|
||||
bool allow_offset_wrap;
|
||||
bool (*allow_offset_wrap_cb)(nir_intrinsic_instr *intr, const void *data);
|
||||
} nir_opt_offsets_options;
|
||||
|
||||
bool nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options);
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ try_extract_const_addition(nir_builder *b, nir_scalar val, opt_offsets_state *st
|
|||
* Ignored for ints-as-floats (lower_bitops is a proxy for that), where
|
||||
* unsigned wrapping doesn't make sense.
|
||||
*/
|
||||
if (!state->options->allow_offset_wrap && need_nuw && !alu->no_unsigned_wrap &&
|
||||
if (need_nuw && !alu->no_unsigned_wrap &&
|
||||
!b->shader->options->lower_bitops) {
|
||||
if (!state->range_ht) {
|
||||
/* Cache for nir_unsigned_upper_bound */
|
||||
|
|
@ -188,6 +188,14 @@ get_max(opt_offsets_state *state, nir_intrinsic_instr *intrin, uint32_t default_
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
allow_offset_wrap(opt_offsets_state *state, nir_intrinsic_instr *intr)
|
||||
{
|
||||
if (state->options->allow_offset_wrap_cb)
|
||||
return state->options->allow_offset_wrap_cb(intr, state->options->cb_data);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
process_instr(nir_builder *b, nir_instr *instr, void *s)
|
||||
{
|
||||
|
|
@ -196,42 +204,43 @@ process_instr(nir_builder *b, nir_instr *instr, void *s)
|
|||
|
||||
opt_offsets_state *state = (opt_offsets_state *)s;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
bool need_nuw = !allow_offset_wrap(state, intrin);
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_uniform:
|
||||
case nir_intrinsic_load_const_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->uniform_max), true);
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->uniform_max), need_nuw);
|
||||
case nir_intrinsic_load_ubo_vec4:
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->ubo_vec4_max), true);
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->ubo_vec4_max), need_nuw);
|
||||
case nir_intrinsic_shared_atomic:
|
||||
case nir_intrinsic_shared_atomic_swap:
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_atomic_max), true);
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_atomic_max), need_nuw);
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_shared_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_max), true);
|
||||
return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_max), need_nuw);
|
||||
case nir_intrinsic_store_shared:
|
||||
case nir_intrinsic_store_shared_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->shared_max), true);
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->shared_max), need_nuw);
|
||||
case nir_intrinsic_load_shared2_amd:
|
||||
return try_fold_shared2(b, intrin, state, 0);
|
||||
case nir_intrinsic_store_shared2_amd:
|
||||
return try_fold_shared2(b, intrin, state, 1);
|
||||
case nir_intrinsic_load_buffer_amd:
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max),
|
||||
nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD);
|
||||
need_nuw &= !!(nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD);
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max), need_nuw);
|
||||
case nir_intrinsic_store_buffer_amd:
|
||||
return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max),
|
||||
nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD);
|
||||
need_nuw &= !!(nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD);
|
||||
return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), need_nuw);
|
||||
case nir_intrinsic_load_ssbo_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max), true);
|
||||
return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max), need_nuw);
|
||||
case nir_intrinsic_store_ssbo_intel:
|
||||
return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), true);
|
||||
return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), need_nuw);
|
||||
case nir_intrinsic_load_ssbo_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), true);
|
||||
return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), need_nuw);
|
||||
case nir_intrinsic_store_ssbo_ir3:
|
||||
return try_fold_load_store(b, intrin, state, 3, get_max(state, intrin, state->options->buffer_max), true);
|
||||
return try_fold_load_store(b, intrin, state, 3, get_max(state, intrin, state->options->buffer_max), need_nuw);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -387,7 +387,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler,
|
|||
.buffer_max = 0,
|
||||
.max_offset_cb = ir3_nir_max_imm_offset,
|
||||
.cb_data = compiler,
|
||||
.allow_offset_wrap = true,
|
||||
.allow_offset_wrap_cb = ir3_nir_allow_base_offset_wrap,
|
||||
};
|
||||
progress |= OPT(s, nir_opt_offsets, &offset_options);
|
||||
|
||||
|
|
|
|||
|
|
@ -196,6 +196,8 @@ is_intrinsic_load(nir_intrinsic_op op)
|
|||
}
|
||||
|
||||
uint32_t ir3_nir_max_imm_offset(nir_intrinsic_instr *intrin, const void *data);
|
||||
bool ir3_nir_allow_base_offset_wrap(nir_intrinsic_instr *intrin,
|
||||
const void *data);
|
||||
unsigned ir3_nir_max_offset_shift(nir_intrinsic_instr *intr, const void *data);
|
||||
|
||||
/* TODO: make this a common NIR helper?
|
||||
|
|
|
|||
|
|
@ -335,6 +335,12 @@ ir3_nir_max_imm_offset(nir_intrinsic_instr *intrin, const void *data)
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_nir_allow_base_offset_wrap(nir_intrinsic_instr *intrin, const void *data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
ir3_nir_max_offset_shift(nir_intrinsic_instr *intr, const void *data)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue