diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index cb67f538ebd..77f19ec0642 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -688,6 +688,29 @@ brw_nir_lower_fs_outputs(nir_shader *nir) this_progress; \ }) +#define LOOP_OPT(pass, ...) ({ \ + const unsigned long this_line = __LINE__; \ + bool this_progress = false; \ + if (opt_line == this_line) \ + break; \ + NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ + if (this_progress) { \ + progress = true; \ + opt_line = this_line; \ + } \ + this_progress; \ +}) + +#define LOOP_OPT_NOT_IDEMPOTENT(pass, ...) ({ \ + bool this_progress = false; \ + NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ + if (this_progress) { \ + progress = true; \ + opt_line = 0; \ + } \ + this_progress; \ +}) + void brw_nir_optimize(nir_shader *nir, const struct intel_device_info *devinfo) @@ -698,6 +721,7 @@ brw_nir_optimize(nir_shader *nir, (nir->options->lower_flrp32 ? 32 : 0) | (nir->options->lower_flrp64 ? 64 : 0); + unsigned long opt_line = 0; do { progress = false; /* This pass is causing problems with types used by OpenCL : @@ -707,36 +731,36 @@ brw_nir_optimize(nir_shader *nir, * code. */ if (nir->info.stage != MESA_SHADER_KERNEL) - OPT(nir_split_array_vars, nir_var_function_temp); - OPT(nir_shrink_vec_array_vars, nir_var_function_temp); - OPT(nir_opt_deref); - if (OPT(nir_opt_memcpy)) - OPT(nir_split_var_copies); - OPT(nir_lower_vars_to_ssa); + LOOP_OPT(nir_split_array_vars, nir_var_function_temp); + LOOP_OPT(nir_shrink_vec_array_vars, nir_var_function_temp); + LOOP_OPT(nir_opt_deref); + if (LOOP_OPT(nir_opt_memcpy)) + LOOP_OPT(nir_split_var_copies); + LOOP_OPT(nir_lower_vars_to_ssa); if (!nir->info.var_copies_lowered) { /* Only run this pass if nir_lower_var_copies was not called * yet. That would lower away any copy_deref instructions and we * don't want to introduce any more. */ - OPT(nir_opt_find_array_copies); + LOOP_OPT(nir_opt_find_array_copies); } - OPT(nir_opt_copy_prop_vars); - OPT(nir_opt_dead_write_vars); - OPT(nir_opt_combine_stores, nir_var_all); + LOOP_OPT(nir_opt_copy_prop_vars); + LOOP_OPT(nir_opt_dead_write_vars); + LOOP_OPT(nir_opt_combine_stores, nir_var_all); - OPT(nir_opt_ray_queries); - OPT(nir_opt_ray_query_ranges); + LOOP_OPT(nir_opt_ray_queries); + LOOP_OPT(nir_opt_ray_query_ranges); - OPT(nir_lower_alu_to_scalar, NULL, NULL); + LOOP_OPT(nir_lower_alu_to_scalar, NULL, NULL); - OPT(nir_copy_prop); + LOOP_OPT(nir_copy_prop); - OPT(nir_lower_phis_to_scalar, false); + LOOP_OPT(nir_lower_phis_to_scalar, false); - OPT(nir_copy_prop); - OPT(nir_opt_dce); - OPT(nir_opt_cse); - OPT(nir_opt_combine_stores, nir_var_all); + LOOP_OPT(nir_copy_prop); + LOOP_OPT(nir_opt_dce); + LOOP_OPT(nir_opt_cse); + LOOP_OPT(nir_opt_combine_stores, nir_var_all); /* Passing 0 to the peephole select pass causes it to convert * if-statements that contain only move instructions in the branches @@ -753,23 +777,23 @@ brw_nir_optimize(nir_shader *nir, * indices will nearly always be in bounds and the cost of the load is * low. Therefore there shouldn't be a performance benefit to avoid it. */ - OPT(nir_opt_peephole_select, 0, true, false); - OPT(nir_opt_peephole_select, 8, true, true); + LOOP_OPT(nir_opt_peephole_select, 0, true, false); + LOOP_OPT(nir_opt_peephole_select, 8, true, true); - OPT(nir_opt_intrinsics); - OPT(nir_opt_idiv_const, 32); - OPT(nir_opt_algebraic); + LOOP_OPT(nir_opt_intrinsics); + LOOP_OPT(nir_opt_idiv_const, 32); + LOOP_OPT_NOT_IDEMPOTENT(nir_opt_algebraic); - OPT(nir_opt_reassociate_bfi); + LOOP_OPT(nir_opt_reassociate_bfi); - OPT(nir_lower_constant_convert_alu_types); - OPT(nir_opt_constant_folding); + LOOP_OPT(nir_lower_constant_convert_alu_types); + LOOP_OPT(nir_opt_constant_folding); if (lower_flrp != 0) { - if (OPT(nir_lower_flrp, + if (LOOP_OPT(nir_lower_flrp, lower_flrp, false /* always_precise */)) { - OPT(nir_opt_constant_folding); + LOOP_OPT(nir_opt_constant_folding); } /* Nothing should rematerialize any flrps, so we only need to do this @@ -778,24 +802,24 @@ brw_nir_optimize(nir_shader *nir, lower_flrp = 0; } - OPT(nir_opt_dead_cf); - if (OPT(nir_opt_loop)) { + LOOP_OPT(nir_opt_dead_cf); + if (LOOP_OPT(nir_opt_loop)) { /* If nir_opt_loop makes progress, then we need to clean * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll * to make progress. */ - OPT(nir_copy_prop); - OPT(nir_opt_dce); + LOOP_OPT(nir_copy_prop); + LOOP_OPT(nir_opt_dce); } - OPT(nir_opt_if, nir_opt_if_optimize_phi_true_false); - OPT(nir_opt_conditional_discard); + LOOP_OPT_NOT_IDEMPOTENT(nir_opt_if, nir_opt_if_optimize_phi_true_false); + LOOP_OPT(nir_opt_conditional_discard); if (nir->options->max_unroll_iterations != 0) { - OPT(nir_opt_loop_unroll); + LOOP_OPT_NOT_IDEMPOTENT(nir_opt_loop_unroll); } - OPT(nir_opt_remove_phis); - OPT(nir_opt_gcm, false); - OPT(nir_opt_undef); - OPT(nir_lower_pack); + LOOP_OPT(nir_opt_remove_phis); + LOOP_OPT(nir_opt_gcm, false); + LOOP_OPT(nir_opt_undef); + LOOP_OPT(nir_lower_pack); } while (progress); /* Workaround Gfxbench unused local sampler variable which will trigger an