nir/peephole_select: add options struct

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33590>
This commit is contained in:
Georg Lehmann 2025-02-17 21:34:10 +01:00 committed by Marge Bot
parent edd82bd03a
commit ca8147edbe
41 changed files with 343 additions and 100 deletions

View file

@ -184,7 +184,13 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
}
NIR_LOOP_PASS_NOT_IDEMPOTENT(progress, skip, shader, nir_opt_if, nir_opt_if_optimize_phi_true_false);
NIR_LOOP_PASS(progress, skip, shader, nir_opt_cse);
NIR_LOOP_PASS(progress, skip, shader, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_LOOP_PASS(progress, skip, shader, nir_opt_peephole_select, &peephole_select_options);
NIR_LOOP_PASS(progress, skip, shader, nir_opt_constant_folding);
NIR_LOOP_PASS(progress, skip, shader, nir_opt_intrinsics);
NIR_LOOP_PASS_NOT_IDEMPOTENT(progress, skip, shader, nir_opt_algebraic);
@ -219,7 +225,13 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad)
NIR_PASS(_, nir, nir_opt_dce);
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_opt_cse);
NIR_PASS(_, nir, nir_opt_peephole_select, 3, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 3,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(more_algebraic, nir, nir_opt_algebraic);
NIR_PASS(_, nir, nir_opt_generate_bfi);
NIR_PASS(_, nir, nir_opt_remove_phis);

View file

@ -59,7 +59,12 @@ optimize(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);

View file

@ -2834,7 +2834,12 @@ agx_optimize_loop_nir(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);
@ -3011,7 +3016,11 @@ agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size)
*
* XXX: Set indirect_load_ok once we can investigate CTS flakes.
*/
NIR_PASS(_, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(_, nir, nir_opt_load_store_vectorize,
&(const nir_load_store_vectorize_options){
@ -3102,7 +3111,7 @@ agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size)
*
* We need to lower int64 again to deal with the resulting 64-bit csels.
*/
NIR_PASS(_, nir, nir_opt_peephole_select, 64, false, true);
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(_, nir, nir_lower_int64);
/* We need to lower fmin/fmax again after nir_opt_algebraic_late due to f2fmp

View file

@ -2163,8 +2163,18 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_cse);
/* before peephole_select as it can generate 64 bit bcsels */
NIR_PASS(progress, s, nir_lower_64bit_phis);
NIR_PASS(progress, s, nir_opt_peephole_select, 0, false, false);
NIR_PASS(progress, s, nir_opt_peephole_select, 24, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 0,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
peephole_select_options = (nir_opt_peephole_select_options){
.limit = 24,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);

View file

@ -381,7 +381,10 @@ nir_load_libclc_shader(unsigned ptr_bit_size,
/* drivers run this pass, so don't be too aggressive. More aggressive
* values only increase effectiveness by <5%
*/
NIR_PASS(progress, nir, nir_opt_peephole_select, 0, false, false);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 0,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_undef);

View file

@ -88,7 +88,13 @@ gl_nir_opts(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_if, 0);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);

View file

@ -2857,7 +2857,10 @@ glsl_float64_funcs_to_nir(struct gl_context *ctx,
NIR_PASS(_, nir, nir_opt_dce);
NIR_PASS(_, nir, nir_opt_cse);
NIR_PASS(_, nir, nir_opt_gcm, true);
NIR_PASS(_, nir, nir_opt_peephole_select, 1, false, false);
nir_opt_peephole_select_options peephole_select_options = {};
peephole_select_options.limit = 1;
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(_, nir, nir_opt_dce);
return nir;

View file

@ -6058,8 +6058,14 @@ typedef struct nir_opt_offsets_options {
bool nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options);
bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
bool indirect_load_ok, bool expensive_alu_ok);
typedef struct nir_opt_peephole_select_options {
unsigned limit; /* Set to max to flatten all control flow. */
bool indirect_load_ok;
bool expensive_alu_ok;
} nir_opt_peephole_select_options;
bool nir_opt_peephole_select(nir_shader *shader,
const nir_opt_peephole_select_options *options);
bool nir_opt_reassociate_bfi(nir_shader *shader);

View file

@ -143,7 +143,10 @@ shader_only_allowed_outputs_use_view_index(nir_shader *shader,
/* Peephole select will drop if-blocks that have then and else empty,
* which will remove the usage of an SSA in the condition.
*/
progress |= nir_opt_peephole_select(shader_no_position, 0, false, false);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 0,
};
progress |= nir_opt_peephole_select(shader_no_position, &peephole_select_options);
progress |= nir_opt_dce(shader_no_position);
} while (progress);

View file

@ -57,13 +57,12 @@
static bool
block_check_for_allowed_instrs(nir_block *block, unsigned *count,
unsigned limit, bool indirect_load_ok,
bool expensive_alu_ok)
const nir_opt_peephole_select_options *options)
{
bool alu_ok = limit != 0;
bool alu_ok = options->limit != 0;
/* Used on non-control-flow HW to flatten all IFs. */
if (limit == ~0) {
if (options->limit == ~0) {
nir_foreach_instr(instr, block) {
switch (instr->type) {
case nir_instr_type_alu:
@ -118,7 +117,7 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,
* because that flow control may be trying to avoid invalid
* loads.
*/
if (!indirect_load_ok && nir_deref_instr_has_indirect(deref))
if (!options->indirect_load_ok && nir_deref_instr_has_indirect(deref))
return false;
break;
@ -131,7 +130,7 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_vec4:
if (!indirect_load_ok && !nir_src_is_const(intrin->src[1]))
if (!options->indirect_load_ok && !nir_src_is_const(intrin->src[1]))
return false;
if (!(nir_intrinsic_access(intrin) & ACCESS_CAN_SPECULATE))
return false;
@ -139,7 +138,7 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,
case nir_intrinsic_load_global_constant:
case nir_intrinsic_load_constant_agx:
if (!indirect_load_ok && !nir_src_is_const(intrin->src[0]))
if (!options->indirect_load_ok && !nir_src_is_const(intrin->src[0]))
return false;
if (!(nir_intrinsic_access(intrin) & ACCESS_CAN_SPECULATE))
return false;
@ -240,7 +239,7 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,
case nir_op_idiv:
case nir_op_irem:
case nir_op_udiv:
if (!alu_ok || !expensive_alu_ok)
if (!alu_ok || !options->expensive_alu_ok)
return false;
break;
@ -281,6 +280,22 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,
return true;
}
static nir_opt_peephole_select_options
get_options_for_if(nir_if *if_stmt,
const nir_opt_peephole_select_options *options)
{
nir_opt_peephole_select_options if_options = *options;
if (if_stmt->control == nir_selection_control_flatten) {
/* Override driver defaults */
if_options.limit = UINT_MAX - 1; /* Maximum without unsafe flattening. */
if_options.indirect_load_ok = true;
if_options.expensive_alu_ok = true;
}
return if_options;
}
/* If we're moving discards out of the if for non-CF hardware, we need to add
* the if's condition to it
*/
@ -337,8 +352,8 @@ rewrite_discard_conds(nir_instr *instr, nir_def *if_cond, bool is_else)
*
*/
static bool
nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader, unsigned limit,
bool indirect_load_ok, bool expensive_alu_ok)
nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader,
const nir_opt_peephole_select_options *options)
{
/* the if has to be nested */
if (if_stmt->cf_node.parent->type != nir_cf_node_if)
@ -390,20 +405,14 @@ nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader, unsigned limit,
}
}
if (parent_if->control == nir_selection_control_flatten) {
/* Override driver defaults */
indirect_load_ok = true;
expensive_alu_ok = true;
}
/* check if the block before the nested if matches the requirements */
nir_block *first = nir_if_first_then_block(parent_if);
nir_opt_peephole_select_options if_options = get_options_for_if(parent_if, options);
unsigned count = 0;
if (!block_check_for_allowed_instrs(first, &count, limit,
indirect_load_ok, expensive_alu_ok))
if (!block_check_for_allowed_instrs(first, &count, &if_options))
return false;
if (count > limit && parent_if->control != nir_selection_control_flatten)
if (count > if_options.limit)
return false;
/* trivialize succeeding phis */
@ -442,8 +451,7 @@ nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader, unsigned limit,
static bool
nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
unsigned limit, bool indirect_load_ok,
bool expensive_alu_ok)
const nir_opt_peephole_select_options *options)
{
if (nir_cf_node_is_first(&block->cf_node))
return false;
@ -466,8 +474,7 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
nir_if *if_stmt = nir_cf_node_as_if(prev_node);
/* first, try to collapse the if */
if (nir_opt_collapse_if(if_stmt, shader, limit,
indirect_load_ok, expensive_alu_ok))
if (nir_opt_collapse_if(if_stmt, shader, options))
return true;
if (if_stmt->control == nir_selection_control_dont_flatten)
@ -481,21 +488,15 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
nir_if_last_else_block(if_stmt) != else_block)
return false;
if (if_stmt->control == nir_selection_control_flatten) {
/* Override driver defaults */
indirect_load_ok = true;
expensive_alu_ok = true;
}
nir_opt_peephole_select_options if_options = get_options_for_if(if_stmt, options);
/* ... and those blocks must only contain "allowed" instructions. */
unsigned count = 0;
if (!block_check_for_allowed_instrs(then_block, &count, limit,
indirect_load_ok, expensive_alu_ok) ||
!block_check_for_allowed_instrs(else_block, &count, limit,
indirect_load_ok, expensive_alu_ok))
if (!block_check_for_allowed_instrs(then_block, &count, &if_options) ||
!block_check_for_allowed_instrs(else_block, &count, &if_options))
return false;
if (count > limit && if_stmt->control != nir_selection_control_flatten)
if (count > if_options.limit)
return false;
/* At this point, we know that the previous CFG node is an if-then
@ -551,16 +552,14 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
}
static bool
nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
bool indirect_load_ok, bool expensive_alu_ok)
nir_opt_peephole_select_impl(nir_function_impl *impl,
const nir_opt_peephole_select_options *options)
{
nir_shader *shader = impl->function->shader;
bool progress = false;
nir_foreach_block_safe(block, impl) {
progress |= nir_opt_peephole_select_block(block, shader, limit,
indirect_load_ok,
expensive_alu_ok);
progress |= nir_opt_peephole_select_block(block, shader, options);
}
if (progress) {
@ -573,15 +572,13 @@ nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
}
bool
nir_opt_peephole_select(nir_shader *shader, unsigned limit,
bool indirect_load_ok, bool expensive_alu_ok)
nir_opt_peephole_select(nir_shader *shader,
const nir_opt_peephole_select_options *options)
{
bool progress = false;
nir_foreach_function_impl(impl, shader) {
progress |= nir_opt_peephole_select_impl(impl, limit,
indirect_load_ok,
expensive_alu_ok);
progress |= nir_opt_peephole_select_impl(impl, options);
}
return progress;

View file

@ -94,7 +94,12 @@ TEST_F(nir_opt_peephole_select_test, opt_load_ubo_no_speculate)
nir_index_blocks(main->impl);
EXPECT_EQ(main->impl->num_blocks, 4);
ASSERT_FALSE(nir_opt_peephole_select(bld.shader, 16, true, true));
nir_opt_peephole_select_options peephole_select_options = {
.limit = 16,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
ASSERT_FALSE(nir_opt_peephole_select(bld.shader, &peephole_select_options));
nir_validate_shader(bld.shader, NULL);
nir_index_blocks(main->impl);
@ -145,7 +150,12 @@ TEST_F(nir_opt_peephole_select_test, opt_load_ubo_speculate)
nir_index_blocks(main->impl);
EXPECT_EQ(main->impl->num_blocks, 4);
ASSERT_TRUE(nir_opt_peephole_select(bld.shader, 16, true, true));
nir_opt_peephole_select_options peephole_select_options = {
.limit = 16,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
ASSERT_TRUE(nir_opt_peephole_select(bld.shader, &peephole_select_options));
nir_validate_shader(bld.shader, NULL);
nir_index_blocks(main->impl);

View file

@ -96,7 +96,12 @@ optimize(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);

View file

@ -337,7 +337,12 @@ ir3_optimize_loop(struct ir3_compiler *compiler,
progress |= OPT(s, nir_opt_gcm, true);
else if (gcm == 2)
progress |= OPT(s, nir_opt_gcm, false);
progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 16,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
progress |= OPT(s, nir_opt_peephole_select, &peephole_select_options);
progress |= OPT(s, nir_opt_intrinsics);
/* NOTE: GS lowering inserts an output var with varying slot that
* is larger than VARYING_SLOT_MAX (ie. GS_VERTEX_FLAGS_IR3),

View file

@ -3339,8 +3339,13 @@ ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen,
NIR_PASS(progress, s, nir_opt_dead_write_vars);
NIR_PASS(progress, s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
NIR_PASS(progress, s, nir_opt_peephole_select,
control_flow_depth == 0 ? ~0 : 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = control_flow_depth == 0 ? ~0 : 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
nir_load_store_vectorize_options vectorize_opts = {

View file

@ -2416,7 +2416,13 @@ ttn_optimize_nir(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);

View file

@ -160,7 +160,13 @@ etna_optimize_loop(nir_shader *s)
progress |= OPT(s, nir_copy_prop);
progress |= OPT(s, nir_opt_dce);
progress |= OPT(s, nir_opt_cse);
progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 16,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
progress |= OPT(s, nir_opt_peephole_select, &peephole_select_options);
progress |= OPT(s, nir_opt_intrinsics);
progress |= OPT(s, nir_opt_algebraic);
progress |= OPT(s, nir_opt_constant_folding);

View file

@ -65,7 +65,12 @@ ir2_optimize_loop(nir_shader *s)
progress |= OPT(s, nir_opt_dce);
progress |= OPT(s, nir_opt_cse);
/* progress |= OPT(s, nir_opt_gcm, true); */
progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = UINT_MAX,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
progress |= OPT(s, nir_opt_peephole_select, &peephole_select_options);
progress |= OPT(s, nir_opt_intrinsics);
progress |= OPT(s, nir_opt_algebraic);
progress |= OPT(s, nir_opt_constant_folding);

View file

@ -206,8 +206,13 @@ i915_optimize_nir(struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_cse);
NIR_PASS(progress, s, nir_opt_find_array_copies);
NIR_PASS(progress, s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
NIR_PASS(progress, s, nir_opt_peephole_select, ~0 /* flatten all IFs. */,
true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = ~0, /* flatten all IFs. */
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_shrink_stores, true);

View file

@ -374,7 +374,11 @@ iris_ensure_indirect_generation_shader(struct iris_batch *batch)
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_opt_cse);
NIR_PASS_V(nir, nir_opt_gcm, true);
NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 1,
};
NIR_PASS_V(nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);

View file

@ -136,7 +136,13 @@ lima_program_optimize_vs_nir(struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, lima_nir_lower_ftrunc);
NIR_PASS(progress, s, nir_opt_constant_folding);
@ -246,7 +252,13 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);

View file

@ -190,7 +190,13 @@ r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
NIR_PASS(progress, s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
if (is_r500)
nir_shader_intrinsics_pass(s, set_speculate, nir_metadata_control_flow, NULL);
NIR_PASS(progress, s, nir_opt_peephole_select, is_r500 ? 8 : ~0, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = is_r500 ? 8 : ~0,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
if (s->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(progress, s, r300_nir_lower_bool_to_float_fs);
}

View file

@ -638,7 +638,13 @@ optimize_once(nir_shader *shader)
NIR_PASS(progress, shader, nir_opt_if, nir_opt_if_optimize_phi_true_false);
NIR_PASS(progress, shader, nir_opt_dead_cf);
NIR_PASS(progress, shader, nir_opt_cse);
NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 200,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, shader, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, shader, nir_opt_conditional_discard);
NIR_PASS(progress, shader, nir_opt_dce);

View file

@ -97,7 +97,13 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_arr
progress |= lower_alu_to_scalar | lower_phis_to_scalar;
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
/* Needed for algebraic lowering */
NIR_PASS(progress, nir, nir_opt_algebraic);

View file

@ -1500,7 +1500,13 @@ vc4_optimize_nir(struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
if (lower_flrp != 0) {

View file

@ -1576,7 +1576,13 @@ optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink)
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
NIR_PASS(progress, s, nir_opt_cse);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);

View file

@ -230,7 +230,13 @@ optimize(nir_shader *nir)
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);

View file

@ -588,7 +588,12 @@ fn opt_nir(nir: &mut NirShader, dev: &Device, has_explicit_types: bool) {
progress |= nir_pass!(nir, nir_opt_dead_cf);
progress |= nir_pass!(nir, nir_opt_remove_phis);
// we don't want to be too aggressive here, but it kills a bit of CFG
progress |= nir_pass!(nir, nir_opt_peephole_select, 8, true, true);
let peephole_select_options = nir_opt_peephole_select_options {
limit: 8,
indirect_load_ok: true,
expensive_alu_ok: true,
};
progress |= nir_pass!(nir, nir_opt_peephole_select, &peephole_select_options);
progress |= nir_pass!(
nir,
nir_lower_vec3_to_vec4,

View file

@ -877,8 +877,15 @@ brw_nir_optimize(nir_shader *nir,
* indices will nearly always be in bounds and the cost of the load is
* low. Therefore there shouldn't be a performance benefit to avoid it.
*/
LOOP_OPT(nir_opt_peephole_select, 0, true, false);
LOOP_OPT(nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 0,
.indirect_load_ok = true,
};
LOOP_OPT(nir_opt_peephole_select, &peephole_select_options);
peephole_select_options.limit = 8;
peephole_select_options.expensive_alu_ok = true;
LOOP_OPT(nir_opt_peephole_select, &peephole_select_options);
LOOP_OPT(nir_opt_intrinsics);
LOOP_OPT(nir_opt_idiv_const, 32);
@ -1839,8 +1846,14 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
* instruction from one of the branches of the if-statement, so now it
* might be under the threshold of conversion to bcsel.
*/
OPT(nir_opt_peephole_select, 0, false, false);
OPT(nir_opt_peephole_select, 1, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 0,
};
OPT(nir_opt_peephole_select, &peephole_select_options);
peephole_select_options.limit = 1;
peephole_select_options.expensive_alu_ok = true;
OPT(nir_opt_peephole_select, &peephole_select_options);
}
do {

View file

@ -31,7 +31,12 @@ optimize(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);

View file

@ -663,9 +663,16 @@ elk_nir_optimize(nir_shader *nir, bool is_scalar,
const bool is_vec4_tessellation = !is_scalar &&
(nir->info.stage == MESA_SHADER_TESS_CTRL ||
nir->info.stage == MESA_SHADER_TESS_EVAL);
OPT(nir_opt_peephole_select, 0, !is_vec4_tessellation, false);
OPT(nir_opt_peephole_select, 8, !is_vec4_tessellation,
devinfo->ver >= 6);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 0,
.indirect_load_ok = !is_vec4_tessellation,
};
OPT(nir_opt_peephole_select, &peephole_select_options);
peephole_select_options.limit = 8;
peephole_select_options.expensive_alu_ok = devinfo->ver >= 6;
OPT(nir_opt_peephole_select, &peephole_select_options);
OPT(nir_opt_intrinsics);
OPT(nir_opt_idiv_const, 32);
@ -1421,9 +1428,16 @@ elk_postprocess_nir(nir_shader *nir, const struct elk_compiler *compiler,
const bool is_vec4_tessellation = !is_scalar &&
(nir->info.stage == MESA_SHADER_TESS_CTRL ||
nir->info.stage == MESA_SHADER_TESS_EVAL);
OPT(nir_opt_peephole_select, 0, is_vec4_tessellation, false);
OPT(nir_opt_peephole_select, 1, is_vec4_tessellation,
compiler->devinfo->ver >= 6);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 0,
.indirect_load_ok = !is_vec4_tessellation,
};
OPT(nir_opt_peephole_select, &peephole_select_options);
peephole_select_options.limit = 1;
peephole_select_options.expensive_alu_ok = compiler->devinfo->ver >= 6;
OPT(nir_opt_peephole_select, &peephole_select_options);
}
do {

View file

@ -31,7 +31,12 @@ optimize(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);

View file

@ -71,7 +71,11 @@ compile_shader(struct anv_device *device,
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_opt_cse);
NIR_PASS_V(nir, nir_opt_gcm, true);
NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 1,
};
NIR_PASS_V(nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);

View file

@ -693,7 +693,11 @@ anv_load_fp64_shader(struct anv_device *device)
NIR_PASS_V(nir, nir_opt_dce);
NIR_PASS_V(nir, nir_opt_cse);
NIR_PASS_V(nir, nir_opt_gcm, true);
NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 1,
};
NIR_PASS_V(nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS_V(nir, nir_opt_dce);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,

View file

@ -855,7 +855,13 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
NIR_PASS(progress, nir, nir_opt_memcpy);
} while (progress);

View file

@ -6326,7 +6326,13 @@ optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 8,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, dxil_nir_algebraic);
if (s->options->lower_int64_options)

View file

@ -136,7 +136,10 @@ optimize_nir(nir_shader *nir, const struct nak_compiler *nak, bool allow_copies)
OPT(nir, nir_opt_dce);
OPT(nir, nir_opt_cse);
OPT(nir, nir_opt_peephole_select, 0, false, false);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 0,
};
OPT(nir, nir_opt_peephole_select, &peephole_select_options);
OPT(nir, nir_opt_intrinsics);
OPT(nir, nir_opt_idiv_const, 32);
OPT(nir, nir_opt_algebraic);

View file

@ -164,7 +164,12 @@ nvk_cg_optimize_nir(nir_shader *nir)
* but a bunch of tessellation shaders blow up.
* we should revisit this when NAK is merged.
*/
NIR_PASS(progress, nir, nir_opt_peephole_select, 2, true, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 2,
.indirect_load_ok = true,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_algebraic);

View file

@ -68,7 +68,12 @@ optimize(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);

View file

@ -5123,7 +5123,12 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);

View file

@ -502,7 +502,12 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_undef);

View file

@ -589,7 +589,12 @@ lower_load_push_consts(nir_shader *nir, struct panvk_shader *shader)
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);
} while (progress);