nak: Use NIR_LOOP_PASS

This is similar to
75ede9d9bc ("intel/brw: track last successful pass and leave the loop early")
except that it uses the common nir helpers.

Note that I've also marked nir_opt_peephole_select as NOT_IDEMPOTENT
because I'm skeptical that it actually is idempotent. This differs from
both brw and radv.

I'm also marking gcm as not idempotent because it isn't idempotent in
practice on one of the shaders in my shader-db:
2bf4ba7133/fossils/blender
pipeline hash 0e972f8e349af903

This is about a 4% geomean compile time speedup on my local collection
of shaders.

Reviewed-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41118>
This commit is contained in:
Mel Henning 2026-04-22 18:33:54 -04:00 committed by Marge Bot
parent 75fc9e2704
commit 4b0a0ed7b6

View file

@ -10,15 +10,19 @@
#include "util/u_math.h"
#define OPT(nir, pass, ...) ({ \
#define _OPT(pass_macro, ...) ({ \
bool this_progress = false; \
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
pass_macro(this_progress, ##__VA_ARGS__); \
if (this_progress) \
progress = true; \
this_progress; \
})
#define OPT(nir, pass, ...) _OPT(NIR_PASS, nir, pass, ##__VA_ARGS__)
#define OPT_V(nir, pass, ...) NIR_PASS(_, nir, pass, ##__VA_ARGS__)
#define LOOP_OPT(...) _OPT(NIR_LOOP_PASS, skip, ##__VA_ARGS__)
#define LOOP_OPT_NOT_IDEMPOTENT(...) \
_OPT(NIR_LOOP_PASS_NOT_IDEMPOTENT, skip, ##__VA_ARGS__)
nir_def *
nak_nir_load_sysval(nir_builder *b, enum nak_sv idx,
@ -139,6 +143,7 @@ void
nak_optimize_nir(nir_shader *nir, const struct nak_compiler *nak)
{
bool progress;
struct set *skip = _mesa_pointer_set_create(NULL);
unsigned lower_flrp =
(nir->options->lower_flrp16 ? 16 : 0) |
@ -155,72 +160,76 @@ nak_optimize_nir(nir_shader *nir, const struct nak_compiler *nak)
* code.
*/
if (nir->info.stage != MESA_SHADER_KERNEL)
OPT(nir, nir_split_array_vars, nir_var_function_temp);
LOOP_OPT(nir, nir_split_array_vars, nir_var_function_temp);
OPT(nir, nir_shrink_vec_array_vars, nir_var_function_temp);
OPT(nir, nir_opt_deref);
if (OPT(nir, nir_opt_memcpy))
OPT(nir, nir_split_var_copies);
LOOP_OPT(nir, nir_shrink_vec_array_vars, nir_var_function_temp);
LOOP_OPT(nir, nir_opt_deref);
if (LOOP_OPT(nir, nir_opt_memcpy))
LOOP_OPT(nir, nir_split_var_copies);
OPT(nir, nir_lower_vars_to_ssa);
LOOP_OPT(nir, nir_lower_vars_to_ssa);
if (!nir->info.var_copies_lowered) {
/* Only run this pass if nir_lower_var_copies was not called
* yet. That would lower away any copy_deref instructions and we
* don't want to introduce any more.
*/
OPT(nir, nir_opt_find_array_copies);
LOOP_OPT(nir, nir_opt_find_array_copies);
}
OPT(nir, nir_opt_copy_prop_vars);
OPT(nir, nir_opt_dead_write_vars);
OPT(nir, nir_opt_combine_stores, nir_var_all);
LOOP_OPT(nir, nir_opt_copy_prop_vars);
LOOP_OPT(nir, nir_opt_dead_write_vars);
LOOP_OPT(nir, nir_opt_combine_stores, nir_var_all);
OPT(nir, nir_lower_alu_width, vectorize_filter_cb, nak);
OPT(nir, nir_opt_vectorize, vectorize_filter_cb, (void*)nak);
OPT(nir, nir_lower_phis_to_scalar, phi_vectorize_cb, NULL);
OPT(nir, nir_lower_frexp);
OPT(nir, nir_opt_copy_prop);
OPT(nir, nir_opt_dce);
OPT(nir, nir_opt_cse);
LOOP_OPT(nir, nir_lower_alu_width, vectorize_filter_cb, nak);
LOOP_OPT(nir, nir_opt_vectorize, vectorize_filter_cb, (void*)nak);
LOOP_OPT(nir, nir_lower_phis_to_scalar, phi_vectorize_cb, NULL);
LOOP_OPT(nir, nir_lower_frexp);
LOOP_OPT(nir, nir_opt_copy_prop);
LOOP_OPT(nir, nir_opt_dce);
LOOP_OPT(nir, nir_opt_cse);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 0,
.discard_ok = true,
};
OPT(nir, nir_opt_peephole_select, &peephole_select_options);
OPT(nir, nir_opt_intrinsics);
OPT(nir, nir_opt_idiv_const, 32);
OPT(nir, nir_opt_algebraic);
OPT(nir, nir_lower_constant_convert_alu_types);
OPT(nir, nir_opt_constant_folding);
LOOP_OPT_NOT_IDEMPOTENT(nir, nir_opt_peephole_select,
&peephole_select_options);
LOOP_OPT(nir, nir_opt_intrinsics);
LOOP_OPT(nir, nir_opt_idiv_const, 32);
LOOP_OPT_NOT_IDEMPOTENT(nir, nir_opt_algebraic);
LOOP_OPT(nir, nir_lower_constant_convert_alu_types);
LOOP_OPT(nir, nir_opt_constant_folding);
if (lower_flrp != 0) {
OPT(nir, nir_lower_flrp, lower_flrp, false /* always_precise */);
LOOP_OPT(nir, nir_lower_flrp, lower_flrp, false /* always_precise */);
/* Nothing should rematerialize any flrps */
lower_flrp = 0;
}
OPT(nir, nir_opt_dead_cf);
if (OPT(nir, nir_opt_loop)) {
LOOP_OPT(nir, nir_opt_dead_cf);
if (LOOP_OPT_NOT_IDEMPOTENT(nir, nir_opt_loop)) {
/* If nir_opt_loop makes progress, then we need to clean things up
* if we want any hope of nir_opt_if or nir_opt_loop_unroll to make
* progress.
*/
OPT(nir, nir_opt_copy_prop);
OPT(nir, nir_opt_dce);
LOOP_OPT(nir, nir_opt_copy_prop);
LOOP_OPT(nir, nir_opt_dce);
}
OPT(nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
OPT(nir, nir_opt_phi_to_bool);
LOOP_OPT_NOT_IDEMPOTENT(nir, nir_opt_if,
nir_opt_if_optimize_phi_true_false);
LOOP_OPT(nir, nir_opt_phi_to_bool);
if (nir->options->max_unroll_iterations != 0) {
OPT(nir, nir_opt_loop_unroll);
LOOP_OPT_NOT_IDEMPOTENT(nir, nir_opt_loop_unroll);
}
OPT(nir, nir_opt_remove_phis);
OPT(nir, nir_opt_gcm, false);
OPT(nir, nir_opt_undef);
LOOP_OPT(nir, nir_opt_remove_phis);
LOOP_OPT_NOT_IDEMPOTENT(nir, nir_opt_gcm, false);
LOOP_OPT(nir, nir_opt_undef);
} while (progress);
OPT(nir, nir_lower_undef_to_zero);
OPT(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
_mesa_set_destroy(skip, NULL);
}
static unsigned