nir/move_vec_src_uses_to_dest: allow to skip reuse of constant sources

And enable this for r300 and intel-vec4

crocus HSW (mostly helps few doplhin ubershaders):
total instructions in shared programs: 1576736 -> 1576589 (<.01%)
instructions in affected programs: 38235 -> 38088 (-0.38%)
helped: 12
HURT: 0
total cycles in shared programs: 111025838 -> 110944796 (-0.07%)
cycles in affected programs: 5646582 -> 5565540 (-1.44%)
helped: 15
HURT: 6
total spills in shared programs: 447 -> 432 (-3.36%)
spills in affected programs: 186 -> 171 (-8.06%)
helped: 12
HURT: 0
total fills in shared programs: 792 -> 774 (-2.27%)
fills in affected programs: 291 -> 273 (-6.19%)
helped: 12
HURT: 0

r300 RV530:
total instructions in shared programs: 96655 -> 96304 (-0.36%)
instructions in affected programs: 15020 -> 14669 (-2.34%)
helped: 79
HURT: 18
total temps in shared programs: 13027 -> 12952 (-0.58%)
temps in affected programs: 677 -> 602 (-11.08%)
helped: 41
HURT: 9
total cycles in shared programs: 147745 -> 147314 (-0.29%)
cycles in affected programs: 21831 -> 21400 (-1.97%)
helped: 84
HURT: 19

r300 RV370:
total instructions in shared programs: 63678 -> 63669 (-0.01%)
instructions in affected programs: 931 -> 922 (-0.97%)
helped: 12
HURT: 6
total temps in shared programs: 10028 -> 10013 (-0.15%)
temps in affected programs: 339 -> 324 (-4.42%)
helped: 33
HURT: 10
total cycles in shared programs: 101118 -> 101087 (-0.03%)
cycles in affected programs: 2659 -> 2628 (-1.17%)
helped: 22
HURT: 6

Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24932>
This commit is contained in:
Pavel Ondračka 2023-08-29 08:46:17 +02:00
parent dc60194599
commit 1c72c71bdf
9 changed files with 16 additions and 13 deletions

View file

@ -5293,7 +5293,7 @@ bool nir_zero_initialize_shared_memory(nir_shader *shader,
const unsigned shared_size,
const unsigned chunk_size);
bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
bool nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs);
bool nir_lower_vec_to_regs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
const void *_data);
void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,

View file

@ -58,7 +58,7 @@ ssa_def_dominates_instr(nir_def *def, nir_instr *instr)
}
static bool
move_vec_src_uses_to_dest_block(nir_block *block)
move_vec_src_uses_to_dest_block(nir_block *block, bool skip_const_srcs)
{
bool progress = false;
@ -95,6 +95,8 @@ move_vec_src_uses_to_dest_block(nir_block *block)
*/
int srcs_remaining = 0;
for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) {
if (skip_const_srcs && nir_src_is_const(vec->src[i].src))
continue;
srcs_remaining |= 1 << i;
}
@ -171,7 +173,8 @@ move_vec_src_uses_to_dest_block(nir_block *block)
}
static bool
nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl,
bool skip_const_srcs)
{
bool progress = false;
@ -180,7 +183,7 @@ nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
nir_index_instrs(impl);
nir_foreach_block(block, impl) {
progress |= move_vec_src_uses_to_dest_block(block);
progress |= move_vec_src_uses_to_dest_block(block, skip_const_srcs);
}
nir_metadata_preserve(impl, nir_metadata_block_index |
@ -190,12 +193,12 @@ nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
}
bool
nir_move_vec_src_uses_to_dest(nir_shader *shader)
nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs)
{
bool progress = false;
nir_foreach_function_impl(impl, shader) {
progress |= nir_move_vec_src_uses_to_dest_impl(shader, impl);
progress |= nir_move_vec_src_uses_to_dest_impl(shader, impl, skip_const_srcs);
}
return progress;

View file

@ -395,7 +395,7 @@ lp_build_nir_aos(struct gallivm_state *gallivm,
bld.bld_base.emit_var_decl = emit_var_decl;
lp_build_nir_prepasses(shader);
NIR_PASS_V(shader, nir_move_vec_src_uses_to_dest);
NIR_PASS_V(shader, nir_move_vec_src_uses_to_dest, false);
NIR_PASS_V(shader, nir_lower_vec_to_regs, NULL, NULL);
lp_build_nir_llvm(&bld.bld_base, shader,
nir_shader_get_entrypoint(shader));

View file

@ -1211,7 +1211,7 @@ etna_compile_shader(struct etna_shader_variant *v)
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS_V(s, nir_opt_algebraic_late);
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, false);
NIR_PASS_V(s, nir_copy_prop);
/* need copy prop after uses_to_dest, and before src mods: see
* dEQP-GLES2.functional.shaders.random.all_features.fragment.95

View file

@ -1148,7 +1148,7 @@ ir2_nir_compile(struct ir2_context *ctx, bool binning)
OPT_V(ctx->nir, nir_convert_from_ssa, true);
OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest);
OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest, false);
OPT_V(ctx->nir, nir_lower_vec_to_regs, NULL, NULL);
OPT_V(ctx->nir, nir_legacy_trivialize, true);

View file

@ -272,7 +272,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
NIR_PASS_V(s, nir_convert_from_ssa, true);
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, false);
NIR_PASS_V(s, nir_lower_vec_to_regs, lima_vec_to_regs_filter_cb, NULL);
NIR_PASS_V(s, nir_opt_dce); /* clean up any new dead code from vec to movs */

View file

@ -2451,7 +2451,7 @@ const void *nir_to_rc_options(struct nir_shader *s,
nir_move_comparisons | nir_move_copies | nir_move_load_ssbo;
NIR_PASS_V(s, nir_opt_move, move_all);
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, true);
NIR_PASS_V(s, nir_convert_from_ssa, true);
NIR_PASS_V(s, nir_lower_vec_to_regs, NULL, NULL);

View file

@ -1732,7 +1732,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
OPT(nir_convert_from_ssa, true);
if (!is_scalar) {
OPT(nir_move_vec_src_uses_to_dest);
OPT(nir_move_vec_src_uses_to_dest, true);
OPT(nir_lower_vec_to_regs, NULL, NULL);
}

View file

@ -477,7 +477,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
NIR_PASS(progress, nir, nir_convert_from_ssa, true);
/* We are a vector architecture; write combine where possible */
NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest, false);
NIR_PASS(progress, nir, nir_lower_vec_to_regs, NULL, NULL);
NIR_PASS(progress, nir, nir_opt_dce);