mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 22:08:26 +02:00
nir,amd: reassociate fadd to create more fma/mad
ACO's backend fusing is quite competent, but it cannot reorder adds. This adds a simple algebraic pass to do that for us. Foz-DB Navi10: Totals from 13568 (18.76% of 72319) affected shaders: MaxWaves: 304722 -> 304004 (-0.24%); split: +0.10%, -0.33% Instrs: 15084252 -> 14993010 (-0.60%); split: -0.61%, +0.00% CodeSize: 81480188 -> 81372600 (-0.13%); split: -0.17%, +0.04% VGPRs: 741580 -> 743680 (+0.28%); split: -0.10%, +0.38% SpillSGPRs: 9418 -> 9434 (+0.17%) Latency: 154602014 -> 154312940 (-0.19%); split: -0.29%, +0.10% InvThroughput: 44628554 -> 44442595 (-0.42%); split: -0.47%, +0.05% VClause: 300035 -> 300054 (+0.01%); split: -0.31%, +0.31% SClause: 370992 -> 370640 (-0.09%); split: -0.15%, +0.06% Copies: 1162401 -> 1162800 (+0.03%); split: -0.30%, +0.33% Branches: 300646 -> 300654 (+0.00%); split: -0.00%, +0.01% PreSGPRs: 673675 -> 675057 (+0.21%); split: -0.00%, +0.21% PreVGPRs: 633017 -> 634768 (+0.28%); split: -0.29%, +0.57% VALU: 10800351 -> 10712041 (-0.82%); split: -0.82%, +0.00% SALU: 1752917 -> 1753203 (+0.02%); split: -0.04%, +0.06% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Reviewed-by: Marek Olšák <maraeo@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41348>
This commit is contained in:
parent
9e87090db4
commit
1716cbff37
6 changed files with 44 additions and 14 deletions
|
|
@ -68,7 +68,7 @@ traces:
|
|||
checksum: 2832d2190df232bcbde71a4b720303e1
|
||||
gputest/tessmark-v2.trace:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: 609c82248eec789f7389063639f288c3
|
||||
checksum: dd3b850cae31a4d0c007377f67c54757
|
||||
humus/AmbientAperture-v2.trace:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: ed3444722d2468d5306bc734f0c24089
|
||||
|
|
@ -98,13 +98,13 @@ traces:
|
|||
checksum: 4c36bc32bc4512773533bbbea078f65b
|
||||
supertuxkart/supertuxkart-antediluvian-abyss.rdc:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: 6b2e8d20d2774ece96c999ef01533695
|
||||
checksum: f5c2bef676a4b5f25997872b78caa402
|
||||
supertuxkart/supertuxkart-menu.rdc:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: e17f3125683d8b2acac8bc0b4b8e1b44
|
||||
supertuxkart/supertuxkart-ravenbridge-mansion.rdc:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: 3719b5ef32db846570f9b9a3b5ac8705
|
||||
checksum: a535e9a02e01d57938b6b845cbefd0c9
|
||||
ror/ror-default.trace:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: 9ab581db528a53d91f870a064aff9cd4
|
||||
|
|
|
|||
|
|
@ -277,6 +277,9 @@ radv_optimize_nir_algebraic_early(nir_shader *nir)
|
|||
void
|
||||
radv_optimize_nir_algebraic_late(nir_shader *nir)
|
||||
{
|
||||
if (nir->info.stage != MESA_SHADER_VERTEX && nir->info.stage != MESA_SHADER_GEOMETRY)
|
||||
NIR_PASS(_, nir, nir_opt_reassociate_for_fma);
|
||||
|
||||
/* Do late algebraic optimization to turn add(a,
|
||||
* neg(b)) back into subs, then the mandatory cleanup
|
||||
* after algebraic. Note that it may produce fnegs,
|
||||
|
|
|
|||
|
|
@ -6688,6 +6688,7 @@ bool nir_opt_algebraic_late(nir_shader *shader);
|
|||
bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader);
|
||||
bool nir_opt_algebraic_integer_promotion(nir_shader *shader);
|
||||
bool nir_opt_reassociate_matrix_mul(nir_shader *shader);
|
||||
bool nir_opt_reassociate_for_fma(nir_shader *shader);
|
||||
bool nir_opt_constant_folding(nir_shader *shader);
|
||||
|
||||
bool nir_opt_fp_math_ctrl(nir_shader *shader);
|
||||
|
|
|
|||
|
|
@ -4246,6 +4246,23 @@ before_lower_int64_optimizations = [
|
|||
(('iadd', ('u2u64', a), ('u2u64', a)), ('ishl', ('u2u64', a), 1)),
|
||||
]
|
||||
|
||||
reassoc_fma_optimizations = [
|
||||
# Try to reassociate fadd to make more adds have a fmul source
|
||||
(('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', ('fadd(is_used_once)', 'c(is_fmul)', 'd(is_fmul)'))), 'e(is_not_fmul)'),
|
||||
('fadd', a, ('fadd', b, ('fadd', c, ('fadd', d, e))))),
|
||||
(('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', 'c(is_fmul)')), 'd(is_not_fmul)'),
|
||||
('fadd', a, ('fadd', b, ('fadd', c, d)))),
|
||||
(('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', 'b(is_fmul)'), 'c(is_not_fmul)'),
|
||||
('fadd', a, ('fadd', b, c))),
|
||||
|
||||
(('~fadd', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', ('fadd(is_used_once)', 'c(is_fmul)', 'd(is_fmul)')))), 'e(is_not_fmul)'),
|
||||
('fadd', ('fneg', a), ('fneg', ('fadd', b, ('fadd', c, ('fadd', d, ('fneg', e))))))),
|
||||
(('~fadd', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', 'c(is_fmul)'))), 'd(is_not_fmul)'),
|
||||
('fadd', ('fneg', a), ('fneg', ('fadd', b, ('fadd', c, ('fneg', d)))))),
|
||||
(('~fadd', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_fmul)', 'b(is_fmul)')), 'c(is_not_fmul)'),
|
||||
('fadd', ('fneg', a), ('fadd', ('fneg', b), c))),
|
||||
]
|
||||
|
||||
# Those optimizations try to reverse integer promotion found in e.g. OpenCL C. Those should be ran
|
||||
# before any bit_size lowering is done.
|
||||
integer_promotion_optimizations = []
|
||||
|
|
@ -4321,6 +4338,12 @@ passes.append(nir_algebraic.AlgebraicPass(
|
|||
build_tests=build_tests
|
||||
))
|
||||
|
||||
passes.append(nir_algebraic.AlgebraicPass(
|
||||
"nir_opt_reassociate_for_fma",
|
||||
reassoc_fma_optimizations,
|
||||
build_tests=build_tests
|
||||
))
|
||||
|
||||
if build_tests:
|
||||
with open(args.out_tests, "w", encoding='utf-8') as f:
|
||||
for p in passes:
|
||||
|
|
|
|||
|
|
@ -94,6 +94,9 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_arr
|
|||
|
||||
void si_nir_late_opts(nir_shader *nir)
|
||||
{
|
||||
if (nir->info.stage != MESA_SHADER_VERTEX && nir->info.stage != MESA_SHADER_GEOMETRY)
|
||||
NIR_PASS(_, nir, nir_opt_reassociate_for_fma);
|
||||
|
||||
bool more_late_algebraic = true;
|
||||
while (more_late_algebraic) {
|
||||
more_late_algebraic = false;
|
||||
|
|
|
|||
|
|
@ -99,9 +99,9 @@ traces:
|
|||
checksum: dbe1de4e2e812413f173ea6c423117ff
|
||||
text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?"
|
||||
zink-radv-vangogh:
|
||||
checksum: 65e86206d1ba3ed8365f8b95dc9e9e71
|
||||
checksum: b09d4cd898f3bd6629890fc8e7ee5ec1
|
||||
zink-radv-gfx1201:
|
||||
checksum: f22ee8433dd7b689c46816d576bf3c12
|
||||
checksum: 4d38468286742d9bfcd8cfb6118acc8a
|
||||
|
||||
gputest/pixmark-julia-fp32-v2.trace:
|
||||
gl-zink-anv-adl:
|
||||
|
|
@ -153,9 +153,9 @@ traces:
|
|||
gl-zink-anv-tgl:
|
||||
checksum: 04a5a9b7cbc88d8a66c33b5f1c08294d
|
||||
zink-radv-vangogh:
|
||||
checksum: c60118df2c8d52d101c789d919c6d19b
|
||||
checksum: b361dcda8115d96061fd3700c1d5c415
|
||||
zink-radv-gfx1201:
|
||||
checksum: c60118df2c8d52d101c789d919c6d19b
|
||||
checksum: b361dcda8115d96061fd3700c1d5c415
|
||||
|
||||
humus/AmbientAperture-v2.trace:
|
||||
gl-zink-anv-adl:
|
||||
|
|
@ -163,9 +163,9 @@ traces:
|
|||
gl-zink-anv-tgl:
|
||||
checksum: 0f3b7351a84e1e6f15430f8766af4b4c
|
||||
zink-radv-vangogh:
|
||||
checksum: 4c56004f4f5f30c748b1ce5a9f3f882f
|
||||
checksum: 8347dcdc720c4a3799df737f24c10a88
|
||||
zink-radv-gfx1201:
|
||||
checksum: aaa80aa7831b764fb2a24f885eeb70d6
|
||||
checksum: 31566f62731204fe422d22db9c5ba759
|
||||
|
||||
humus/Portals-v2.trace:
|
||||
gl-zink-anv-adl:
|
||||
|
|
@ -333,9 +333,9 @@ traces:
|
|||
checksum: 0af2faa0d9183c1bc4dc7612befe1f0a
|
||||
text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?"
|
||||
zink-radv-vangogh:
|
||||
checksum: 3353b921903f712b919135ebebe3827b
|
||||
checksum: b501ff9ff123d45809d9ce609c9a4d03
|
||||
zink-radv-gfx1201:
|
||||
checksum: 84e632818da5a55a4c67bb5c1ab5d473
|
||||
checksum: 3a4216d4da6a5f17c8659d7cdefad9ad
|
||||
|
||||
supertuxkart/supertuxkart-menu.rdc:
|
||||
gl-zink-anv-adl:
|
||||
|
|
@ -361,9 +361,9 @@ traces:
|
|||
checksum: ca0b64f1a62e01765146be8391eae636
|
||||
text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?"
|
||||
zink-radv-vangogh:
|
||||
checksum: 1301e76c2fb69438e8cfb2f81dcc6611
|
||||
checksum: c824b3bc569c722357ca87ce5079c84f
|
||||
zink-radv-gfx1201:
|
||||
checksum: f01f909e9af079efa388f275526f809f
|
||||
checksum: b3fdac56b33b7ca30df77f517b5b96f7
|
||||
|
||||
unvanquished/unvanquished-lowest.trace:
|
||||
gl-zink-anv-adl:
|
||||
|
|
@ -387,7 +387,7 @@ traces:
|
|||
gl-zink-anv-tgl:
|
||||
checksum: 026dde18e934e7ce3e36eb13ea8e975c
|
||||
zink-radv-vangogh:
|
||||
checksum: 08f44ba93a13220f5e6db3a00e6449ea
|
||||
checksum: 9df8f5c18d8dfc09530071dedcf87120
|
||||
zink-radv-gfx1201:
|
||||
# checksum: 08f44ba93a13220f5e6db3a00e6449ea
|
||||
# checksum: 5078d9c5d272fa231da4d960c90baead
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue