mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
Merge branch 'intel-nir-passes' into 'main'
Draft: brw: do less stuff in brw_nir_optimize See merge request mesa/mesa!38413
This commit is contained in:
commit
bc9c06012a
1 changed files with 68 additions and 99 deletions
|
|
@ -1377,27 +1377,6 @@ brw_nir_optimize(nir_shader *nir,
|
|||
if (nir->info.stage != MESA_SHADER_KERNEL)
|
||||
LOOP_OPT(nir_split_array_vars, nir_var_function_temp);
|
||||
LOOP_OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
|
||||
LOOP_OPT(nir_opt_deref);
|
||||
if (LOOP_OPT(nir_opt_memcpy))
|
||||
LOOP_OPT(nir_split_var_copies);
|
||||
LOOP_OPT(nir_lower_vars_to_ssa);
|
||||
if (!nir->info.var_copies_lowered) {
|
||||
/* Only run this pass if nir_lower_var_copies was not called
|
||||
* yet. That would lower away any copy_deref instructions and we
|
||||
* don't want to introduce any more.
|
||||
*/
|
||||
LOOP_OPT(nir_opt_find_array_copies);
|
||||
}
|
||||
LOOP_OPT(nir_opt_copy_prop_vars);
|
||||
LOOP_OPT(nir_opt_dead_write_vars);
|
||||
LOOP_OPT(nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
LOOP_OPT(nir_opt_ray_queries);
|
||||
LOOP_OPT(nir_opt_ray_query_ranges);
|
||||
|
||||
LOOP_OPT(nir_lower_alu_to_scalar, NULL, NULL);
|
||||
|
||||
LOOP_OPT(nir_opt_copy_prop);
|
||||
|
||||
LOOP_OPT(nir_lower_phis_to_scalar, NULL, NULL);
|
||||
|
||||
|
|
@ -1406,39 +1385,21 @@ brw_nir_optimize(nir_shader *nir,
|
|||
LOOP_OPT(nir_opt_cse);
|
||||
LOOP_OPT(nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
/* Passing 0 to the peephole select pass causes it to convert
|
||||
* if-statements that contain only move instructions in the branches
|
||||
* regardless of the count.
|
||||
*
|
||||
* Passing 1 to the peephole select pass causes it to convert
|
||||
* if-statements that contain at most a single ALU instruction (total)
|
||||
* in both branches. Before Gfx6, some math instructions were
|
||||
* prohibitively expensive and the results of compare operations need an
|
||||
* extra resolve step. For these reasons, this pass is more harmful
|
||||
* than good on those platforms.
|
||||
*
|
||||
* For indirect loads of uniforms (push constants), we assume that array
|
||||
/* For indirect loads of uniforms (push constants), we assume that array
|
||||
* indices will nearly always be in bounds and the cost of the load is
|
||||
* low. Therefore there shouldn't be a performance benefit to avoid it.
|
||||
*/
|
||||
nir_opt_peephole_select_options peephole_select_options = {
|
||||
.limit = 0,
|
||||
.limit = 8,
|
||||
.indirect_load_ok = true,
|
||||
.expensive_alu_ok = true,
|
||||
.discard_ok = true,
|
||||
};
|
||||
LOOP_OPT(nir_opt_peephole_select, &peephole_select_options);
|
||||
|
||||
peephole_select_options.limit = 8;
|
||||
peephole_select_options.expensive_alu_ok = true;
|
||||
LOOP_OPT(nir_opt_peephole_select, &peephole_select_options);
|
||||
|
||||
LOOP_OPT(nir_opt_intrinsics);
|
||||
LOOP_OPT(nir_opt_idiv_const, 32);
|
||||
LOOP_OPT_NOT_IDEMPOTENT(nir_opt_algebraic);
|
||||
|
||||
LOOP_OPT(nir_opt_generate_bfi);
|
||||
LOOP_OPT(nir_opt_reassociate_bfi);
|
||||
|
||||
LOOP_OPT(nir_lower_constant_convert_alu_types);
|
||||
LOOP_OPT(nir_opt_constant_folding);
|
||||
|
||||
LOOP_OPT(nir_opt_dead_cf);
|
||||
|
|
@ -1452,24 +1413,13 @@ brw_nir_optimize(nir_shader *nir,
|
|||
}
|
||||
LOOP_OPT_NOT_IDEMPOTENT(nir_opt_if, nir_opt_if_optimize_phi_true_false);
|
||||
|
||||
nir_opt_peephole_select_options peephole_discard_options = {
|
||||
.limit = 0,
|
||||
.discard_ok = true,
|
||||
};
|
||||
LOOP_OPT(nir_opt_peephole_select, &peephole_discard_options);
|
||||
if (nir->options->max_unroll_iterations != 0) {
|
||||
LOOP_OPT_NOT_IDEMPOTENT(nir_opt_loop_unroll);
|
||||
}
|
||||
LOOP_OPT(nir_opt_remove_phis);
|
||||
LOOP_OPT(nir_opt_gcm, false);
|
||||
LOOP_OPT(nir_opt_undef);
|
||||
LOOP_OPT(nir_lower_pack);
|
||||
} while (progress);
|
||||
|
||||
/* Workaround Gfxbench unused local sampler variable which will trigger an
|
||||
* assert in the opt_large_constants pass.
|
||||
*/
|
||||
OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
|
@ -1681,12 +1631,34 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
|
|||
OPT(nir_normalize_cubemap_coords);
|
||||
|
||||
OPT(nir_lower_global_vars_to_local);
|
||||
OPT(nir_lower_pack);
|
||||
OPT(nir_lower_constant_convert_alu_types);
|
||||
|
||||
OPT(nir_split_var_copies);
|
||||
OPT(nir_split_struct_vars, nir_var_function_temp);
|
||||
|
||||
if (OPT(nir_opt_memcpy))
|
||||
OPT(nir_split_var_copies);
|
||||
|
||||
OPT(nir_lower_vars_to_ssa);
|
||||
|
||||
/* Run this pass before nir_lower_var_copies: it introduces copy_derefs. */
|
||||
OPT(nir_opt_find_array_copies);
|
||||
|
||||
brw_nir_optimize(nir, devinfo);
|
||||
|
||||
if (nir->info.ray_queries) {
|
||||
OPT(nir_opt_ray_queries);
|
||||
OPT(nir_opt_ray_query_ranges);
|
||||
}
|
||||
|
||||
OPT(nir_opt_deref);
|
||||
OPT(nir_opt_copy_prop_vars);
|
||||
OPT(nir_opt_dead_write_vars);
|
||||
|
||||
OPT(nir_lower_vars_to_ssa);
|
||||
OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
||||
unsigned lower_flrp =
|
||||
(nir->options->lower_flrp16 ? 16 : 0) |
|
||||
(nir->options->lower_flrp32 ? 32 : 0) |
|
||||
|
|
@ -1740,7 +1712,8 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
|
|||
|
||||
nir_variable_mode indirect_mask =
|
||||
brw_nir_no_indirect_mask(compiler, nir->info.stage);
|
||||
OPT(nir_lower_indirect_derefs_to_if_else_trees, indirect_mask, UINT32_MAX);
|
||||
if (OPT(nir_lower_indirect_derefs_to_if_else_trees, indirect_mask, UINT32_MAX))
|
||||
OPT(nir_lower_vars_to_ssa);
|
||||
|
||||
/* Even in cases where we can handle indirect temporaries via scratch, we
|
||||
* it can still be expensive. Lower indirects on small arrays to
|
||||
|
|
@ -1755,8 +1728,10 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
|
|||
* issues are helped but nothing else in shader-db is hurt except for maybe
|
||||
* that one kerbal space program shader.
|
||||
*/
|
||||
if (!(indirect_mask & nir_var_function_temp))
|
||||
OPT(nir_lower_indirect_derefs_to_if_else_trees, nir_var_function_temp, 16);
|
||||
if (!(indirect_mask & nir_var_function_temp)) {
|
||||
if (OPT(nir_lower_indirect_derefs_to_if_else_trees, nir_var_function_temp, 16))
|
||||
OPT(nir_lower_vars_to_ssa);
|
||||
}
|
||||
|
||||
/* Lower array derefs of vectors for SSBO and UBO loads. For both UBOs and
|
||||
* SSBOs, our back-end is capable of loading an entire vec4 at a time and
|
||||
|
|
@ -1765,9 +1740,12 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
|
|||
* optimizer to combine UBO and SSBO load operations and save us some send
|
||||
* messages.
|
||||
*/
|
||||
OPT(nir_lower_array_deref_of_vec,
|
||||
if (OPT(nir_lower_array_deref_of_vec,
|
||||
nir_var_mem_ubo | nir_var_mem_ssbo, NULL,
|
||||
nir_lower_direct_array_deref_of_vec_load);
|
||||
nir_lower_direct_array_deref_of_vec_load)) {
|
||||
|
||||
OPT(nir_opt_copy_prop_vars);
|
||||
}
|
||||
|
||||
/* Clamp load_per_vertex_input of the TCS stage so that we do not generate
|
||||
* loads reading out of bounds. We can do this here because we called
|
||||
|
|
@ -1934,14 +1912,18 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
|||
|
||||
NIR_PASS(_, producer, nir_lower_io_vars_to_scalar, nir_var_shader_out);
|
||||
NIR_PASS(_, consumer, nir_lower_io_vars_to_scalar, nir_var_shader_in);
|
||||
NIR_PASS(_, producer, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(_, consumer, nir_opt_copy_prop_vars);
|
||||
brw_nir_optimize(producer, devinfo);
|
||||
brw_nir_optimize(consumer, devinfo);
|
||||
|
||||
if (nir_link_opt_varyings(producer, consumer))
|
||||
brw_nir_optimize(consumer, devinfo);
|
||||
|
||||
NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
|
||||
NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
|
||||
NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out |
|
||||
nir_var_function_temp, NULL);
|
||||
NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in |
|
||||
nir_var_function_temp, NULL);
|
||||
|
||||
if (nir_remove_unused_varyings(producer, consumer)) {
|
||||
if (should_print_nir(producer)) {
|
||||
|
|
@ -1955,6 +1937,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
|||
|
||||
NIR_PASS(_, producer, nir_lower_global_vars_to_local);
|
||||
NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
|
||||
NIR_PASS(_, producer, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(_, consumer, nir_opt_copy_prop_vars);
|
||||
|
||||
brw_nir_optimize(producer, devinfo);
|
||||
brw_nir_optimize(consumer, devinfo);
|
||||
|
|
@ -1992,6 +1976,7 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
|||
NIR_PASS(_, producer, nir_lower_global_vars_to_local);
|
||||
NIR_PASS(_, producer, nir_split_var_copies);
|
||||
NIR_PASS(_, producer, nir_lower_var_copies);
|
||||
NIR_PASS(_, producer, nir_lower_vars_to_ssa);
|
||||
}
|
||||
|
||||
if (producer->info.stage == MESA_SHADER_TASK &&
|
||||
|
|
@ -2258,7 +2243,7 @@ brw_vectorize_lower_mem_access(nir_shader *nir,
|
|||
const struct brw_compiler *compiler,
|
||||
enum brw_robustness_flags robust_flags)
|
||||
{
|
||||
bool progress = false;
|
||||
UNUSED bool progress = false;
|
||||
|
||||
nir_load_store_vectorize_options options = {
|
||||
.modes = nir_var_mem_ubo | nir_var_mem_ssbo |
|
||||
|
|
@ -2321,17 +2306,11 @@ brw_vectorize_lower_mem_access(nir_shader *nir,
|
|||
.cb_data = &cb_data,
|
||||
};
|
||||
OPT(nir_lower_mem_access_bit_sizes, &mem_access_options);
|
||||
|
||||
while (progress) {
|
||||
progress = false;
|
||||
|
||||
OPT(nir_lower_pack);
|
||||
OPT(nir_opt_copy_prop);
|
||||
OPT(nir_opt_dce);
|
||||
OPT(nir_opt_cse);
|
||||
OPT(nir_opt_algebraic);
|
||||
OPT(nir_opt_constant_folding);
|
||||
}
|
||||
OPT(nir_opt_cse);
|
||||
|
||||
/* Do this after the vectorization & brw_nir_rebase_const_offset_ubo_loads
|
||||
* so that we maximize the offset put into the messages.
|
||||
|
|
@ -2565,6 +2544,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
OPT(brw_nir_lower_texture);
|
||||
|
||||
OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler);
|
||||
OPT(nir_lower_alu_to_scalar, NULL, NULL);
|
||||
|
||||
OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL);
|
||||
|
||||
|
|
@ -2573,9 +2553,10 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
OPT(nir_opt_algebraic_before_ffma);
|
||||
} while (progress);
|
||||
|
||||
OPT(nir_opt_idiv_const, 32);
|
||||
|
||||
if (devinfo->verx10 >= 125) {
|
||||
/* Lower integer division by constants before nir_lower_idiv. */
|
||||
OPT(nir_opt_idiv_const, 32);
|
||||
const nir_lower_idiv_options options = {
|
||||
.allow_fp16 = false
|
||||
};
|
||||
|
|
@ -2641,6 +2622,8 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
OPT(nir_opt_shrink_vectors, false);
|
||||
|
||||
OPT(intel_nir_opt_peephole_imul32x16);
|
||||
OPT(nir_opt_generate_bfi);
|
||||
OPT(nir_opt_reassociate_bfi);
|
||||
|
||||
if (OPT(nir_opt_comparison_pre)) {
|
||||
OPT(nir_opt_copy_prop);
|
||||
|
|
@ -2653,33 +2636,22 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
* might be under the threshold of conversion to bcsel.
|
||||
*/
|
||||
nir_opt_peephole_select_options peephole_select_options = {
|
||||
.limit = 0,
|
||||
.limit = 1,
|
||||
.expensive_alu_ok = true,
|
||||
};
|
||||
OPT(nir_opt_peephole_select, &peephole_select_options);
|
||||
|
||||
peephole_select_options.limit = 1;
|
||||
peephole_select_options.expensive_alu_ok = true;
|
||||
OPT(nir_opt_peephole_select, &peephole_select_options);
|
||||
}
|
||||
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
OPT(brw_nir_opt_fsat);
|
||||
OPT(nir_opt_algebraic_late);
|
||||
OPT(brw_nir_lower_fsign);
|
||||
OPT(brw_nir_opt_fsat);
|
||||
|
||||
if (progress) {
|
||||
OPT(nir_opt_constant_folding);
|
||||
while (OPT(nir_opt_algebraic_late)) {
|
||||
OPT(nir_opt_copy_prop);
|
||||
OPT(nir_opt_dce);
|
||||
OPT(nir_opt_cse);
|
||||
}
|
||||
} while (progress);
|
||||
|
||||
|
||||
OPT(nir_lower_fp16_casts, nir_lower_fp16_split_fp64);
|
||||
|
||||
OPT(nir_lower_alu_to_scalar, NULL, NULL);
|
||||
|
||||
while (OPT(nir_opt_algebraic_distribute_src_mods)) {
|
||||
|
|
@ -2816,10 +2788,7 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir,
|
|||
}
|
||||
|
||||
OPT(nir_convert_from_ssa, true, true);
|
||||
|
||||
OPT(nir_opt_dce);
|
||||
|
||||
if (OPT(nir_opt_rematerialize_compares))
|
||||
OPT(nir_opt_rematerialize_compares);
|
||||
OPT(nir_opt_dce);
|
||||
|
||||
nir_trivialize_registers(nir);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue