mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-21 09:50:36 +02:00
ir3: Re-fuse ffmas after nir_lower_imul cleanup breaks them.
The nir_opt_algebraic() call to clean up nir_lower_imul's split up mul
operations (stuff like "the top 16 bits were 0, no need to mul and add
that part") would trigger the options->fuse_ffma_* early ffma splitting,
so you need to call nir_opt_algebraic_late() again after that (which, in
turn, requires a DCE).
Gets us a lot more ffmas in Aztec Ruins high under zink/angle, but doesn't
seem to change perf.
shader-db highlights:
total instructions in shared programs: 11574843 -> 10999629 (-4.97%)
instructions in affected programs: 3308870 -> 2733656 (-17.38%)
total dwords in shared programs: 24344722 -> 23230122 (-4.58%)
dwords in affected programs: 6569568 -> 5454968 (-16.97%)
total full in shared programs: 762616 -> 762224 (-0.05%)
full in affected programs: 15505 -> 15113 (-2.53%)
total stp in shared programs: 4046 -> 4050 (0.10%)
stp in affected programs: 3372 -> 3376 (0.12%)
total ldp in shared programs: 2166 -> 2170 (0.18%)
ldp in affected programs: 1716 -> 1720 (0.23%)
total (ss) in shared programs: 219541 -> 216261 (-1.49%)
(ss) in affected programs: 23227 -> 19947 (-14.12%)
total (sy) in shared programs: 101633 -> 101927 (0.29%)
(sy) in affected programs: 8611 -> 8905 (3.41%)
total waves in shared programs: 1501942 -> 1501772 (-0.01%)
waves in affected programs: 1880 -> 1710 (-9.04%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18946>
This commit is contained in:
parent
a39113b616
commit
062d4d83e1
1 changed files with 9 additions and 0 deletions
|
|
@ -84,6 +84,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
|||
* in ir3_optimize_nir():
|
||||
*/
|
||||
bool progress = false;
|
||||
bool needs_late_alg = false;
|
||||
NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs);
|
||||
|
||||
/* we could need cleanup after lower_locals_to_regs */
|
||||
|
|
@ -91,6 +92,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
|||
progress = false;
|
||||
NIR_PASS(progress, ctx->s, nir_opt_algebraic);
|
||||
NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
|
||||
needs_late_alg = true;
|
||||
}
|
||||
|
||||
/* We want to lower nir_op_imul as late as possible, to catch also
|
||||
|
|
@ -107,6 +109,13 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
|||
NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars);
|
||||
NIR_PASS(progress, ctx->s, nir_opt_dce);
|
||||
NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
|
||||
needs_late_alg = true;
|
||||
}
|
||||
|
||||
/* nir_opt_algebraic() above would have unfused our ffmas, re-fuse them. */
|
||||
if (needs_late_alg) {
|
||||
NIR_PASS(progress, ctx->s, nir_opt_algebraic_late);
|
||||
NIR_PASS(progress, ctx->s, nir_opt_dce);
|
||||
}
|
||||
|
||||
/* Enable the texture pre-fetch feature only a4xx onwards. But
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue