Merge branch 'intel-nir-passes' into 'main'

Draft: brw: do less stuff in brw_nir_optimize

See merge request mesa/mesa!38413
This commit is contained in:
Alyssa Rosenzweig 2025-12-19 19:47:14 -05:00
commit bc9c06012a

View file

@ -1377,27 +1377,6 @@ brw_nir_optimize(nir_shader *nir,
if (nir->info.stage != MESA_SHADER_KERNEL) if (nir->info.stage != MESA_SHADER_KERNEL)
LOOP_OPT(nir_split_array_vars, nir_var_function_temp); LOOP_OPT(nir_split_array_vars, nir_var_function_temp);
LOOP_OPT(nir_shrink_vec_array_vars, nir_var_function_temp); LOOP_OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
LOOP_OPT(nir_opt_deref);
if (LOOP_OPT(nir_opt_memcpy))
LOOP_OPT(nir_split_var_copies);
LOOP_OPT(nir_lower_vars_to_ssa);
if (!nir->info.var_copies_lowered) {
/* Only run this pass if nir_lower_var_copies was not called
* yet. That would lower away any copy_deref instructions and we
* don't want to introduce any more.
*/
LOOP_OPT(nir_opt_find_array_copies);
}
LOOP_OPT(nir_opt_copy_prop_vars);
LOOP_OPT(nir_opt_dead_write_vars);
LOOP_OPT(nir_opt_combine_stores, nir_var_all);
LOOP_OPT(nir_opt_ray_queries);
LOOP_OPT(nir_opt_ray_query_ranges);
LOOP_OPT(nir_lower_alu_to_scalar, NULL, NULL);
LOOP_OPT(nir_opt_copy_prop);
LOOP_OPT(nir_lower_phis_to_scalar, NULL, NULL); LOOP_OPT(nir_lower_phis_to_scalar, NULL, NULL);
@ -1406,39 +1385,21 @@ brw_nir_optimize(nir_shader *nir,
LOOP_OPT(nir_opt_cse); LOOP_OPT(nir_opt_cse);
LOOP_OPT(nir_opt_combine_stores, nir_var_all); LOOP_OPT(nir_opt_combine_stores, nir_var_all);
/* Passing 0 to the peephole select pass causes it to convert /* For indirect loads of uniforms (push constants), we assume that array
* if-statements that contain only move instructions in the branches
* regardless of the count.
*
* Passing 1 to the peephole select pass causes it to convert
* if-statements that contain at most a single ALU instruction (total)
* in both branches. Before Gfx6, some math instructions were
* prohibitively expensive and the results of compare operations need an
* extra resolve step. For these reasons, this pass is more harmful
* than good on those platforms.
*
* For indirect loads of uniforms (push constants), we assume that array
* indices will nearly always be in bounds and the cost of the load is * indices will nearly always be in bounds and the cost of the load is
* low. Therefore there shouldn't be a performance benefit to avoid it. * low. Therefore there shouldn't be a performance benefit to avoid it.
*/ */
nir_opt_peephole_select_options peephole_select_options = { nir_opt_peephole_select_options peephole_select_options = {
.limit = 0, .limit = 8,
.indirect_load_ok = true, .indirect_load_ok = true,
.expensive_alu_ok = true,
.discard_ok = true,
}; };
LOOP_OPT(nir_opt_peephole_select, &peephole_select_options); LOOP_OPT(nir_opt_peephole_select, &peephole_select_options);
peephole_select_options.limit = 8;
peephole_select_options.expensive_alu_ok = true;
LOOP_OPT(nir_opt_peephole_select, &peephole_select_options);
LOOP_OPT(nir_opt_intrinsics); LOOP_OPT(nir_opt_intrinsics);
LOOP_OPT(nir_opt_idiv_const, 32);
LOOP_OPT_NOT_IDEMPOTENT(nir_opt_algebraic); LOOP_OPT_NOT_IDEMPOTENT(nir_opt_algebraic);
LOOP_OPT(nir_opt_generate_bfi);
LOOP_OPT(nir_opt_reassociate_bfi);
LOOP_OPT(nir_lower_constant_convert_alu_types);
LOOP_OPT(nir_opt_constant_folding); LOOP_OPT(nir_opt_constant_folding);
LOOP_OPT(nir_opt_dead_cf); LOOP_OPT(nir_opt_dead_cf);
@ -1452,24 +1413,13 @@ brw_nir_optimize(nir_shader *nir,
} }
LOOP_OPT_NOT_IDEMPOTENT(nir_opt_if, nir_opt_if_optimize_phi_true_false); LOOP_OPT_NOT_IDEMPOTENT(nir_opt_if, nir_opt_if_optimize_phi_true_false);
nir_opt_peephole_select_options peephole_discard_options = {
.limit = 0,
.discard_ok = true,
};
LOOP_OPT(nir_opt_peephole_select, &peephole_discard_options);
if (nir->options->max_unroll_iterations != 0) { if (nir->options->max_unroll_iterations != 0) {
LOOP_OPT_NOT_IDEMPOTENT(nir_opt_loop_unroll); LOOP_OPT_NOT_IDEMPOTENT(nir_opt_loop_unroll);
} }
LOOP_OPT(nir_opt_remove_phis); LOOP_OPT(nir_opt_remove_phis);
LOOP_OPT(nir_opt_gcm, false); LOOP_OPT(nir_opt_gcm, false);
LOOP_OPT(nir_opt_undef); LOOP_OPT(nir_opt_undef);
LOOP_OPT(nir_lower_pack);
} while (progress); } while (progress);
/* Workaround Gfxbench unused local sampler variable which will trigger an
* assert in the opt_large_constants pass.
*/
OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
} }
static unsigned static unsigned
@ -1681,12 +1631,34 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
OPT(nir_normalize_cubemap_coords); OPT(nir_normalize_cubemap_coords);
OPT(nir_lower_global_vars_to_local); OPT(nir_lower_global_vars_to_local);
OPT(nir_lower_pack);
OPT(nir_lower_constant_convert_alu_types);
OPT(nir_split_var_copies); OPT(nir_split_var_copies);
OPT(nir_split_struct_vars, nir_var_function_temp); OPT(nir_split_struct_vars, nir_var_function_temp);
if (OPT(nir_opt_memcpy))
OPT(nir_split_var_copies);
OPT(nir_lower_vars_to_ssa);
/* Run this pass before nir_lower_var_copies: it introduces copy_derefs. */
OPT(nir_opt_find_array_copies);
brw_nir_optimize(nir, devinfo); brw_nir_optimize(nir, devinfo);
if (nir->info.ray_queries) {
OPT(nir_opt_ray_queries);
OPT(nir_opt_ray_query_ranges);
}
OPT(nir_opt_deref);
OPT(nir_opt_copy_prop_vars);
OPT(nir_opt_dead_write_vars);
OPT(nir_lower_vars_to_ssa);
OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
unsigned lower_flrp = unsigned lower_flrp =
(nir->options->lower_flrp16 ? 16 : 0) | (nir->options->lower_flrp16 ? 16 : 0) |
(nir->options->lower_flrp32 ? 32 : 0) | (nir->options->lower_flrp32 ? 32 : 0) |
@ -1740,7 +1712,8 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
nir_variable_mode indirect_mask = nir_variable_mode indirect_mask =
brw_nir_no_indirect_mask(compiler, nir->info.stage); brw_nir_no_indirect_mask(compiler, nir->info.stage);
OPT(nir_lower_indirect_derefs_to_if_else_trees, indirect_mask, UINT32_MAX); if (OPT(nir_lower_indirect_derefs_to_if_else_trees, indirect_mask, UINT32_MAX))
OPT(nir_lower_vars_to_ssa);
/* Even in cases where we can handle indirect temporaries via scratch, we /* Even in cases where we can handle indirect temporaries via scratch, we
* it can still be expensive. Lower indirects on small arrays to * it can still be expensive. Lower indirects on small arrays to
@ -1755,8 +1728,10 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
* issues are helped but nothing else in shader-db is hurt except for maybe * issues are helped but nothing else in shader-db is hurt except for maybe
* that one kerbal space program shader. * that one kerbal space program shader.
*/ */
if (!(indirect_mask & nir_var_function_temp)) if (!(indirect_mask & nir_var_function_temp)) {
OPT(nir_lower_indirect_derefs_to_if_else_trees, nir_var_function_temp, 16); if (OPT(nir_lower_indirect_derefs_to_if_else_trees, nir_var_function_temp, 16))
OPT(nir_lower_vars_to_ssa);
}
/* Lower array derefs of vectors for SSBO and UBO loads. For both UBOs and /* Lower array derefs of vectors for SSBO and UBO loads. For both UBOs and
* SSBOs, our back-end is capable of loading an entire vec4 at a time and * SSBOs, our back-end is capable of loading an entire vec4 at a time and
@ -1765,9 +1740,12 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
* optimizer to combine UBO and SSBO load operations and save us some send * optimizer to combine UBO and SSBO load operations and save us some send
* messages. * messages.
*/ */
OPT(nir_lower_array_deref_of_vec, if (OPT(nir_lower_array_deref_of_vec,
nir_var_mem_ubo | nir_var_mem_ssbo, NULL, nir_var_mem_ubo | nir_var_mem_ssbo, NULL,
nir_lower_direct_array_deref_of_vec_load); nir_lower_direct_array_deref_of_vec_load)) {
OPT(nir_opt_copy_prop_vars);
}
/* Clamp load_per_vertex_input of the TCS stage so that we do not generate /* Clamp load_per_vertex_input of the TCS stage so that we do not generate
* loads reading out of bounds. We can do this here because we called * loads reading out of bounds. We can do this here because we called
@ -1934,14 +1912,18 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
NIR_PASS(_, producer, nir_lower_io_vars_to_scalar, nir_var_shader_out); NIR_PASS(_, producer, nir_lower_io_vars_to_scalar, nir_var_shader_out);
NIR_PASS(_, consumer, nir_lower_io_vars_to_scalar, nir_var_shader_in); NIR_PASS(_, consumer, nir_lower_io_vars_to_scalar, nir_var_shader_in);
NIR_PASS(_, producer, nir_opt_copy_prop_vars);
NIR_PASS(_, consumer, nir_opt_copy_prop_vars);
brw_nir_optimize(producer, devinfo); brw_nir_optimize(producer, devinfo);
brw_nir_optimize(consumer, devinfo); brw_nir_optimize(consumer, devinfo);
if (nir_link_opt_varyings(producer, consumer)) if (nir_link_opt_varyings(producer, consumer))
brw_nir_optimize(consumer, devinfo); brw_nir_optimize(consumer, devinfo);
NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out |
NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); nir_var_function_temp, NULL);
NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in |
nir_var_function_temp, NULL);
if (nir_remove_unused_varyings(producer, consumer)) { if (nir_remove_unused_varyings(producer, consumer)) {
if (should_print_nir(producer)) { if (should_print_nir(producer)) {
@ -1955,6 +1937,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
NIR_PASS(_, producer, nir_lower_global_vars_to_local); NIR_PASS(_, producer, nir_lower_global_vars_to_local);
NIR_PASS(_, consumer, nir_lower_global_vars_to_local); NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
NIR_PASS(_, producer, nir_opt_copy_prop_vars);
NIR_PASS(_, consumer, nir_opt_copy_prop_vars);
brw_nir_optimize(producer, devinfo); brw_nir_optimize(producer, devinfo);
brw_nir_optimize(consumer, devinfo); brw_nir_optimize(consumer, devinfo);
@ -1992,6 +1976,7 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
NIR_PASS(_, producer, nir_lower_global_vars_to_local); NIR_PASS(_, producer, nir_lower_global_vars_to_local);
NIR_PASS(_, producer, nir_split_var_copies); NIR_PASS(_, producer, nir_split_var_copies);
NIR_PASS(_, producer, nir_lower_var_copies); NIR_PASS(_, producer, nir_lower_var_copies);
NIR_PASS(_, producer, nir_lower_vars_to_ssa);
} }
if (producer->info.stage == MESA_SHADER_TASK && if (producer->info.stage == MESA_SHADER_TASK &&
@ -2258,7 +2243,7 @@ brw_vectorize_lower_mem_access(nir_shader *nir,
const struct brw_compiler *compiler, const struct brw_compiler *compiler,
enum brw_robustness_flags robust_flags) enum brw_robustness_flags robust_flags)
{ {
bool progress = false; UNUSED bool progress = false;
nir_load_store_vectorize_options options = { nir_load_store_vectorize_options options = {
.modes = nir_var_mem_ubo | nir_var_mem_ssbo | .modes = nir_var_mem_ubo | nir_var_mem_ssbo |
@ -2321,17 +2306,11 @@ brw_vectorize_lower_mem_access(nir_shader *nir,
.cb_data = &cb_data, .cb_data = &cb_data,
}; };
OPT(nir_lower_mem_access_bit_sizes, &mem_access_options); OPT(nir_lower_mem_access_bit_sizes, &mem_access_options);
while (progress) {
progress = false;
OPT(nir_lower_pack); OPT(nir_lower_pack);
OPT(nir_opt_copy_prop); OPT(nir_opt_copy_prop);
OPT(nir_opt_dce); OPT(nir_opt_dce);
OPT(nir_opt_cse);
OPT(nir_opt_algebraic); OPT(nir_opt_algebraic);
OPT(nir_opt_constant_folding); OPT(nir_opt_cse);
}
/* Do this after the vectorization & brw_nir_rebase_const_offset_ubo_loads /* Do this after the vectorization & brw_nir_rebase_const_offset_ubo_loads
* so that we maximize the offset put into the messages. * so that we maximize the offset put into the messages.
@ -2565,6 +2544,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
OPT(brw_nir_lower_texture); OPT(brw_nir_lower_texture);
OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler); OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler);
OPT(nir_lower_alu_to_scalar, NULL, NULL);
OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL); OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL);
@ -2573,9 +2553,10 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
OPT(nir_opt_algebraic_before_ffma); OPT(nir_opt_algebraic_before_ffma);
} while (progress); } while (progress);
OPT(nir_opt_idiv_const, 32);
if (devinfo->verx10 >= 125) { if (devinfo->verx10 >= 125) {
/* Lower integer division by constants before nir_lower_idiv. */ /* Lower integer division by constants before nir_lower_idiv. */
OPT(nir_opt_idiv_const, 32);
const nir_lower_idiv_options options = { const nir_lower_idiv_options options = {
.allow_fp16 = false .allow_fp16 = false
}; };
@ -2641,6 +2622,8 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
OPT(nir_opt_shrink_vectors, false); OPT(nir_opt_shrink_vectors, false);
OPT(intel_nir_opt_peephole_imul32x16); OPT(intel_nir_opt_peephole_imul32x16);
OPT(nir_opt_generate_bfi);
OPT(nir_opt_reassociate_bfi);
if (OPT(nir_opt_comparison_pre)) { if (OPT(nir_opt_comparison_pre)) {
OPT(nir_opt_copy_prop); OPT(nir_opt_copy_prop);
@ -2653,33 +2636,22 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
* might be under the threshold of conversion to bcsel. * might be under the threshold of conversion to bcsel.
*/ */
nir_opt_peephole_select_options peephole_select_options = { nir_opt_peephole_select_options peephole_select_options = {
.limit = 0, .limit = 1,
.expensive_alu_ok = true,
}; };
OPT(nir_opt_peephole_select, &peephole_select_options); OPT(nir_opt_peephole_select, &peephole_select_options);
peephole_select_options.limit = 1;
peephole_select_options.expensive_alu_ok = true;
OPT(nir_opt_peephole_select, &peephole_select_options);
} }
do {
progress = false;
OPT(brw_nir_opt_fsat);
OPT(nir_opt_algebraic_late);
OPT(brw_nir_lower_fsign); OPT(brw_nir_lower_fsign);
OPT(brw_nir_opt_fsat);
if (progress) { while (OPT(nir_opt_algebraic_late)) {
OPT(nir_opt_constant_folding);
OPT(nir_opt_copy_prop); OPT(nir_opt_copy_prop);
OPT(nir_opt_dce); OPT(nir_opt_dce);
OPT(nir_opt_cse); OPT(nir_opt_cse);
} }
} while (progress);
OPT(nir_lower_fp16_casts, nir_lower_fp16_split_fp64); OPT(nir_lower_fp16_casts, nir_lower_fp16_split_fp64);
OPT(nir_lower_alu_to_scalar, NULL, NULL); OPT(nir_lower_alu_to_scalar, NULL, NULL);
while (OPT(nir_opt_algebraic_distribute_src_mods)) { while (OPT(nir_opt_algebraic_distribute_src_mods)) {
@ -2816,10 +2788,7 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir,
} }
OPT(nir_convert_from_ssa, true, true); OPT(nir_convert_from_ssa, true, true);
OPT(nir_opt_rematerialize_compares);
OPT(nir_opt_dce);
if (OPT(nir_opt_rematerialize_compares))
OPT(nir_opt_dce); OPT(nir_opt_dce);
nir_trivialize_registers(nir); nir_trivialize_registers(nir);