mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 15:10:12 +01:00
intel/brw: track last successful pass and leave the loop early
This is similar to what RADV implements using the NIR_LOOP_PASS
helpers. I have not used those helpers for a couple of reasons:
1. They use the pointer to the optimization function, which doesn't
work if the same function is called multiple times in one invocation
of the loop (fixable)
2. After fixing them, due to Intel's use of sub-expressions, the amount
of code added to wrap the shared macro becomes more than simply
reimplementing them for the Intel compiler
On most workloads the results are a wash, but on compile heavy
workloads like Cyberpunk 2077 and Rise of the Tomb Raider, I saw
fossil-db runtimes fall by 1-2% on my ICL, with no changes to the
compiled shaders. Caio saw closer to 2.5% on TGL.
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27510>
This commit is contained in:
parent
b2ee98d2db
commit
75ede9d9bc
1 changed files with 64 additions and 40 deletions
|
|
@ -688,6 +688,29 @@ brw_nir_lower_fs_outputs(nir_shader *nir)
|
|||
this_progress; \
|
||||
})
|
||||
|
||||
#define LOOP_OPT(pass, ...) ({ \
|
||||
const unsigned long this_line = __LINE__; \
|
||||
bool this_progress = false; \
|
||||
if (opt_line == this_line) \
|
||||
break; \
|
||||
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
|
||||
if (this_progress) { \
|
||||
progress = true; \
|
||||
opt_line = this_line; \
|
||||
} \
|
||||
this_progress; \
|
||||
})
|
||||
|
||||
#define LOOP_OPT_NOT_IDEMPOTENT(pass, ...) ({ \
|
||||
bool this_progress = false; \
|
||||
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
|
||||
if (this_progress) { \
|
||||
progress = true; \
|
||||
opt_line = 0; \
|
||||
} \
|
||||
this_progress; \
|
||||
})
|
||||
|
||||
void
|
||||
brw_nir_optimize(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo)
|
||||
|
|
@ -698,6 +721,7 @@ brw_nir_optimize(nir_shader *nir,
|
|||
(nir->options->lower_flrp32 ? 32 : 0) |
|
||||
(nir->options->lower_flrp64 ? 64 : 0);
|
||||
|
||||
unsigned long opt_line = 0;
|
||||
do {
|
||||
progress = false;
|
||||
/* This pass is causing problems with types used by OpenCL :
|
||||
|
|
@ -707,36 +731,36 @@ brw_nir_optimize(nir_shader *nir,
|
|||
* code.
|
||||
*/
|
||||
if (nir->info.stage != MESA_SHADER_KERNEL)
|
||||
OPT(nir_split_array_vars, nir_var_function_temp);
|
||||
OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
|
||||
OPT(nir_opt_deref);
|
||||
if (OPT(nir_opt_memcpy))
|
||||
OPT(nir_split_var_copies);
|
||||
OPT(nir_lower_vars_to_ssa);
|
||||
LOOP_OPT(nir_split_array_vars, nir_var_function_temp);
|
||||
LOOP_OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
|
||||
LOOP_OPT(nir_opt_deref);
|
||||
if (LOOP_OPT(nir_opt_memcpy))
|
||||
LOOP_OPT(nir_split_var_copies);
|
||||
LOOP_OPT(nir_lower_vars_to_ssa);
|
||||
if (!nir->info.var_copies_lowered) {
|
||||
/* Only run this pass if nir_lower_var_copies was not called
|
||||
* yet. That would lower away any copy_deref instructions and we
|
||||
* don't want to introduce any more.
|
||||
*/
|
||||
OPT(nir_opt_find_array_copies);
|
||||
LOOP_OPT(nir_opt_find_array_copies);
|
||||
}
|
||||
OPT(nir_opt_copy_prop_vars);
|
||||
OPT(nir_opt_dead_write_vars);
|
||||
OPT(nir_opt_combine_stores, nir_var_all);
|
||||
LOOP_OPT(nir_opt_copy_prop_vars);
|
||||
LOOP_OPT(nir_opt_dead_write_vars);
|
||||
LOOP_OPT(nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
OPT(nir_opt_ray_queries);
|
||||
OPT(nir_opt_ray_query_ranges);
|
||||
LOOP_OPT(nir_opt_ray_queries);
|
||||
LOOP_OPT(nir_opt_ray_query_ranges);
|
||||
|
||||
OPT(nir_lower_alu_to_scalar, NULL, NULL);
|
||||
LOOP_OPT(nir_lower_alu_to_scalar, NULL, NULL);
|
||||
|
||||
OPT(nir_copy_prop);
|
||||
LOOP_OPT(nir_copy_prop);
|
||||
|
||||
OPT(nir_lower_phis_to_scalar, false);
|
||||
LOOP_OPT(nir_lower_phis_to_scalar, false);
|
||||
|
||||
OPT(nir_copy_prop);
|
||||
OPT(nir_opt_dce);
|
||||
OPT(nir_opt_cse);
|
||||
OPT(nir_opt_combine_stores, nir_var_all);
|
||||
LOOP_OPT(nir_copy_prop);
|
||||
LOOP_OPT(nir_opt_dce);
|
||||
LOOP_OPT(nir_opt_cse);
|
||||
LOOP_OPT(nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
/* Passing 0 to the peephole select pass causes it to convert
|
||||
* if-statements that contain only move instructions in the branches
|
||||
|
|
@ -753,23 +777,23 @@ brw_nir_optimize(nir_shader *nir,
|
|||
* indices will nearly always be in bounds and the cost of the load is
|
||||
* low. Therefore there shouldn't be a performance benefit to avoid it.
|
||||
*/
|
||||
OPT(nir_opt_peephole_select, 0, true, false);
|
||||
OPT(nir_opt_peephole_select, 8, true, true);
|
||||
LOOP_OPT(nir_opt_peephole_select, 0, true, false);
|
||||
LOOP_OPT(nir_opt_peephole_select, 8, true, true);
|
||||
|
||||
OPT(nir_opt_intrinsics);
|
||||
OPT(nir_opt_idiv_const, 32);
|
||||
OPT(nir_opt_algebraic);
|
||||
LOOP_OPT(nir_opt_intrinsics);
|
||||
LOOP_OPT(nir_opt_idiv_const, 32);
|
||||
LOOP_OPT_NOT_IDEMPOTENT(nir_opt_algebraic);
|
||||
|
||||
OPT(nir_opt_reassociate_bfi);
|
||||
LOOP_OPT(nir_opt_reassociate_bfi);
|
||||
|
||||
OPT(nir_lower_constant_convert_alu_types);
|
||||
OPT(nir_opt_constant_folding);
|
||||
LOOP_OPT(nir_lower_constant_convert_alu_types);
|
||||
LOOP_OPT(nir_opt_constant_folding);
|
||||
|
||||
if (lower_flrp != 0) {
|
||||
if (OPT(nir_lower_flrp,
|
||||
if (LOOP_OPT(nir_lower_flrp,
|
||||
lower_flrp,
|
||||
false /* always_precise */)) {
|
||||
OPT(nir_opt_constant_folding);
|
||||
LOOP_OPT(nir_opt_constant_folding);
|
||||
}
|
||||
|
||||
/* Nothing should rematerialize any flrps, so we only need to do this
|
||||
|
|
@ -778,24 +802,24 @@ brw_nir_optimize(nir_shader *nir,
|
|||
lower_flrp = 0;
|
||||
}
|
||||
|
||||
OPT(nir_opt_dead_cf);
|
||||
if (OPT(nir_opt_loop)) {
|
||||
LOOP_OPT(nir_opt_dead_cf);
|
||||
if (LOOP_OPT(nir_opt_loop)) {
|
||||
/* If nir_opt_loop makes progress, then we need to clean
|
||||
* things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
|
||||
* to make progress.
|
||||
*/
|
||||
OPT(nir_copy_prop);
|
||||
OPT(nir_opt_dce);
|
||||
LOOP_OPT(nir_copy_prop);
|
||||
LOOP_OPT(nir_opt_dce);
|
||||
}
|
||||
OPT(nir_opt_if, nir_opt_if_optimize_phi_true_false);
|
||||
OPT(nir_opt_conditional_discard);
|
||||
LOOP_OPT_NOT_IDEMPOTENT(nir_opt_if, nir_opt_if_optimize_phi_true_false);
|
||||
LOOP_OPT(nir_opt_conditional_discard);
|
||||
if (nir->options->max_unroll_iterations != 0) {
|
||||
OPT(nir_opt_loop_unroll);
|
||||
LOOP_OPT_NOT_IDEMPOTENT(nir_opt_loop_unroll);
|
||||
}
|
||||
OPT(nir_opt_remove_phis);
|
||||
OPT(nir_opt_gcm, false);
|
||||
OPT(nir_opt_undef);
|
||||
OPT(nir_lower_pack);
|
||||
LOOP_OPT(nir_opt_remove_phis);
|
||||
LOOP_OPT(nir_opt_gcm, false);
|
||||
LOOP_OPT(nir_opt_undef);
|
||||
LOOP_OPT(nir_lower_pack);
|
||||
} while (progress);
|
||||
|
||||
/* Workaround Gfxbench unused local sampler variable which will trigger an
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue