diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ea8a5d4ad67..48260c51643 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6339,7 +6339,7 @@ typedef struct nir_opt_offsets_options { * Allow the offset calculation to wrap. If false, constant additions that * might wrap will not be folded into the offset. */ - bool allow_offset_wrap; + bool (*allow_offset_wrap_cb)(nir_intrinsic_instr *intr, const void *data); } nir_opt_offsets_options; bool nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options); diff --git a/src/compiler/nir/nir_opt_offsets.c b/src/compiler/nir/nir_opt_offsets.c index 92efeaf4ac2..7c5e7531e2d 100644 --- a/src/compiler/nir/nir_opt_offsets.c +++ b/src/compiler/nir/nir_opt_offsets.c @@ -57,7 +57,7 @@ try_extract_const_addition(nir_builder *b, nir_scalar val, opt_offsets_state *st * Ignored for ints-as-floats (lower_bitops is a proxy for that), where * unsigned wrapping doesn't make sense. */ - if (!state->options->allow_offset_wrap && need_nuw && !alu->no_unsigned_wrap && + if (need_nuw && !alu->no_unsigned_wrap && !b->shader->options->lower_bitops) { if (!state->range_ht) { /* Cache for nir_unsigned_upper_bound */ @@ -188,6 +188,14 @@ get_max(opt_offsets_state *state, nir_intrinsic_instr *intrin, uint32_t default_ return 0; } +static bool +allow_offset_wrap(opt_offsets_state *state, nir_intrinsic_instr *intr) +{ + if (state->options->allow_offset_wrap_cb) + return state->options->allow_offset_wrap_cb(intr, state->options->cb_data); + return false; +} + static bool process_instr(nir_builder *b, nir_instr *instr, void *s) { @@ -196,42 +204,43 @@ process_instr(nir_builder *b, nir_instr *instr, void *s) opt_offsets_state *state = (opt_offsets_state *)s; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + bool need_nuw = !allow_offset_wrap(state, intrin); switch (intrin->intrinsic) { case nir_intrinsic_load_uniform: case nir_intrinsic_load_const_ir3: - return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->uniform_max), true); + return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->uniform_max), need_nuw); case nir_intrinsic_load_ubo_vec4: - return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->ubo_vec4_max), true); + return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->ubo_vec4_max), need_nuw); case nir_intrinsic_shared_atomic: case nir_intrinsic_shared_atomic_swap: - return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_atomic_max), true); + return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_atomic_max), need_nuw); case nir_intrinsic_load_shared: case nir_intrinsic_load_shared_ir3: - return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_max), true); + return try_fold_load_store(b, intrin, state, 0, get_max(state, intrin, state->options->shared_max), need_nuw); case nir_intrinsic_store_shared: case nir_intrinsic_store_shared_ir3: - return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->shared_max), true); + return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->shared_max), need_nuw); case nir_intrinsic_load_shared2_amd: return try_fold_shared2(b, intrin, state, 0); case nir_intrinsic_store_shared2_amd: return try_fold_shared2(b, intrin, state, 1); case nir_intrinsic_load_buffer_amd: - return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max), - nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD); + need_nuw &= !!(nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD); + return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max), need_nuw); case nir_intrinsic_store_buffer_amd: - return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), - nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD); + need_nuw &= !!(nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD); + return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), need_nuw); case nir_intrinsic_load_ssbo_intel: case nir_intrinsic_load_ssbo_uniform_block_intel: case nir_intrinsic_load_ubo_uniform_block_intel: - return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max), true); + return try_fold_load_store(b, intrin, state, 1, get_max(state, intrin, state->options->buffer_max), need_nuw); case nir_intrinsic_store_ssbo_intel: - return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), true); + return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), need_nuw); case nir_intrinsic_load_ssbo_ir3: - return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), true); + return try_fold_load_store(b, intrin, state, 2, get_max(state, intrin, state->options->buffer_max), need_nuw); case nir_intrinsic_store_ssbo_ir3: - return try_fold_load_store(b, intrin, state, 3, get_max(state, intrin, state->options->buffer_max), true); + return try_fold_load_store(b, intrin, state, 3, get_max(state, intrin, state->options->buffer_max), need_nuw); default: return false; } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 812b09856ad..d58f715e2e2 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -387,7 +387,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, .buffer_max = 0, .max_offset_cb = ir3_nir_max_imm_offset, .cb_data = compiler, - .allow_offset_wrap = true, + .allow_offset_wrap_cb = ir3_nir_allow_base_offset_wrap, }; progress |= OPT(s, nir_opt_offsets, &offset_options); diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index ddf58703381..a3a475bcf8e 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -196,6 +196,8 @@ is_intrinsic_load(nir_intrinsic_op op) } uint32_t ir3_nir_max_imm_offset(nir_intrinsic_instr *intrin, const void *data); +bool ir3_nir_allow_base_offset_wrap(nir_intrinsic_instr *intrin, + const void *data); unsigned ir3_nir_max_offset_shift(nir_intrinsic_instr *intr, const void *data); /* TODO: make this a common NIR helper? diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c index a9ebfbff79e..7143d1d5909 100644 --- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c @@ -335,6 +335,12 @@ ir3_nir_max_imm_offset(nir_intrinsic_instr *intrin, const void *data) } } +bool +ir3_nir_allow_base_offset_wrap(nir_intrinsic_instr *intrin, const void *data) +{ + return true; +} + unsigned ir3_nir_max_offset_shift(nir_intrinsic_instr *intr, const void *data) {