From 1716cbff376ebc8af8d7a57cdc713cebeba22923 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 4 May 2026 16:57:49 +0200 Subject: [PATCH] nir,amd: reassociate fadd to create more fma/mad MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACO's backend fusing is quite competent, but it cannot reorder adds. This adds a simple algebraic pass to do that for us. Foz-DB Navi10: Totals from 13568 (18.76% of 72319) affected shaders: MaxWaves: 304722 -> 304004 (-0.24%); split: +0.10%, -0.33% Instrs: 15084252 -> 14993010 (-0.60%); split: -0.61%, +0.00% CodeSize: 81480188 -> 81372600 (-0.13%); split: -0.17%, +0.04% VGPRs: 741580 -> 743680 (+0.28%); split: -0.10%, +0.38% SpillSGPRs: 9418 -> 9434 (+0.17%) Latency: 154602014 -> 154312940 (-0.19%); split: -0.29%, +0.10% InvThroughput: 44628554 -> 44442595 (-0.42%); split: -0.47%, +0.05% VClause: 300035 -> 300054 (+0.01%); split: -0.31%, +0.31% SClause: 370992 -> 370640 (-0.09%); split: -0.15%, +0.06% Copies: 1162401 -> 1162800 (+0.03%); split: -0.30%, +0.33% Branches: 300646 -> 300654 (+0.00%); split: -0.00%, +0.01% PreSGPRs: 673675 -> 675057 (+0.21%); split: -0.00%, +0.21% PreVGPRs: 633017 -> 634768 (+0.28%); split: -0.29%, +0.57% VALU: 10800351 -> 10712041 (-0.82%); split: -0.82%, +0.00% SALU: 1752917 -> 1753203 (+0.02%); split: -0.04%, +0.06% Reviewed-by: Daniel Schürmann Reviewed-by: Marek Olšák Part-of: --- src/amd/ci/traces-amd.yml | 6 ++--- src/amd/vulkan/radv_shader.c | 3 +++ src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_opt_algebraic.py | 23 +++++++++++++++++++ .../drivers/radeonsi/gfx/si_shader_nir.c | 3 +++ src/gallium/drivers/zink/ci/traces-zink.yml | 22 +++++++++--------- 6 files changed, 44 insertions(+), 14 deletions(-) diff --git a/src/amd/ci/traces-amd.yml b/src/amd/ci/traces-amd.yml index cd25c3f6d96..8178eca656a 100644 --- a/src/amd/ci/traces-amd.yml +++ b/src/amd/ci/traces-amd.yml @@ -68,7 +68,7 @@ traces: checksum: 2832d2190df232bcbde71a4b720303e1 gputest/tessmark-v2.trace: gl-radeonsi-stoney: - checksum: 609c82248eec789f7389063639f288c3 + checksum: dd3b850cae31a4d0c007377f67c54757 humus/AmbientAperture-v2.trace: gl-radeonsi-stoney: checksum: ed3444722d2468d5306bc734f0c24089 @@ -98,13 +98,13 @@ traces: checksum: 4c36bc32bc4512773533bbbea078f65b supertuxkart/supertuxkart-antediluvian-abyss.rdc: gl-radeonsi-stoney: - checksum: 6b2e8d20d2774ece96c999ef01533695 + checksum: f5c2bef676a4b5f25997872b78caa402 supertuxkart/supertuxkart-menu.rdc: gl-radeonsi-stoney: checksum: e17f3125683d8b2acac8bc0b4b8e1b44 supertuxkart/supertuxkart-ravenbridge-mansion.rdc: gl-radeonsi-stoney: - checksum: 3719b5ef32db846570f9b9a3b5ac8705 + checksum: a535e9a02e01d57938b6b845cbefd0c9 ror/ror-default.trace: gl-radeonsi-stoney: checksum: 9ab581db528a53d91f870a064aff9cd4 diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 04c924c0d14..5d623950c10 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -277,6 +277,9 @@ radv_optimize_nir_algebraic_early(nir_shader *nir) void radv_optimize_nir_algebraic_late(nir_shader *nir) { + if (nir->info.stage != MESA_SHADER_VERTEX && nir->info.stage != MESA_SHADER_GEOMETRY) + NIR_PASS(_, nir, nir_opt_reassociate_for_fma); + /* Do late algebraic optimization to turn add(a, * neg(b)) back into subs, then the mandatory cleanup * after algebraic. Note that it may produce fnegs, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f79f5104af4..2ede4c42f7c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6688,6 +6688,7 @@ bool nir_opt_algebraic_late(nir_shader *shader); bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); bool nir_opt_algebraic_integer_promotion(nir_shader *shader); bool nir_opt_reassociate_matrix_mul(nir_shader *shader); +bool nir_opt_reassociate_for_fma(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); bool nir_opt_fp_math_ctrl(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 9d0cd043504..135067d934c 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -4246,6 +4246,23 @@ before_lower_int64_optimizations = [ (('iadd', ('u2u64', a), ('u2u64', a)), ('ishl', ('u2u64', a), 1)), ] +reassoc_fma_optimizations = [ + # Try to reassociate fadd to make more adds have a fmul source + (('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', ('fadd(is_used_once)', 'c(is_fmul)', 'd(is_fmul)'))), 'e(is_not_fmul)'), + ('fadd', a, ('fadd', b, ('fadd', c, ('fadd', d, e))))), + (('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', 'c(is_fmul)')), 'd(is_not_fmul)'), + ('fadd', a, ('fadd', b, ('fadd', c, d)))), + (('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', 'b(is_fmul)'), 'c(is_not_fmul)'), + ('fadd', a, ('fadd', b, c))), + + (('~fadd', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', ('fadd(is_used_once)', 'c(is_fmul)', 'd(is_fmul)')))), 'e(is_not_fmul)'), + ('fadd', ('fneg', a), ('fneg', ('fadd', b, ('fadd', c, ('fadd', d, ('fneg', e))))))), + (('~fadd', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', 'c(is_fmul)'))), 'd(is_not_fmul)'), + ('fadd', ('fneg', a), ('fneg', ('fadd', b, ('fadd', c, ('fneg', d)))))), + (('~fadd', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_fmul)', 'b(is_fmul)')), 'c(is_not_fmul)'), + ('fadd', ('fneg', a), ('fadd', ('fneg', b), c))), +] + # Those optimizations try to reverse integer promotion found in e.g. OpenCL C. Those should be ran # before any bit_size lowering is done. integer_promotion_optimizations = [] @@ -4321,6 +4338,12 @@ passes.append(nir_algebraic.AlgebraicPass( build_tests=build_tests )) +passes.append(nir_algebraic.AlgebraicPass( + "nir_opt_reassociate_for_fma", + reassoc_fma_optimizations, + build_tests=build_tests +)) + if build_tests: with open(args.out_tests, "w", encoding='utf-8') as f: for p in passes: diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_nir.c b/src/gallium/drivers/radeonsi/gfx/si_shader_nir.c index 6a0f03685ed..9b0d7cecc2a 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_nir.c @@ -94,6 +94,9 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_arr void si_nir_late_opts(nir_shader *nir) { + if (nir->info.stage != MESA_SHADER_VERTEX && nir->info.stage != MESA_SHADER_GEOMETRY) + NIR_PASS(_, nir, nir_opt_reassociate_for_fma); + bool more_late_algebraic = true; while (more_late_algebraic) { more_late_algebraic = false; diff --git a/src/gallium/drivers/zink/ci/traces-zink.yml b/src/gallium/drivers/zink/ci/traces-zink.yml index 18ad215e443..6d26057c633 100644 --- a/src/gallium/drivers/zink/ci/traces-zink.yml +++ b/src/gallium/drivers/zink/ci/traces-zink.yml @@ -99,9 +99,9 @@ traces: checksum: dbe1de4e2e812413f173ea6c423117ff text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" zink-radv-vangogh: - checksum: 65e86206d1ba3ed8365f8b95dc9e9e71 + checksum: b09d4cd898f3bd6629890fc8e7ee5ec1 zink-radv-gfx1201: - checksum: f22ee8433dd7b689c46816d576bf3c12 + checksum: 4d38468286742d9bfcd8cfb6118acc8a gputest/pixmark-julia-fp32-v2.trace: gl-zink-anv-adl: @@ -153,9 +153,9 @@ traces: gl-zink-anv-tgl: checksum: 04a5a9b7cbc88d8a66c33b5f1c08294d zink-radv-vangogh: - checksum: c60118df2c8d52d101c789d919c6d19b + checksum: b361dcda8115d96061fd3700c1d5c415 zink-radv-gfx1201: - checksum: c60118df2c8d52d101c789d919c6d19b + checksum: b361dcda8115d96061fd3700c1d5c415 humus/AmbientAperture-v2.trace: gl-zink-anv-adl: @@ -163,9 +163,9 @@ traces: gl-zink-anv-tgl: checksum: 0f3b7351a84e1e6f15430f8766af4b4c zink-radv-vangogh: - checksum: 4c56004f4f5f30c748b1ce5a9f3f882f + checksum: 8347dcdc720c4a3799df737f24c10a88 zink-radv-gfx1201: - checksum: aaa80aa7831b764fb2a24f885eeb70d6 + checksum: 31566f62731204fe422d22db9c5ba759 humus/Portals-v2.trace: gl-zink-anv-adl: @@ -333,9 +333,9 @@ traces: checksum: 0af2faa0d9183c1bc4dc7612befe1f0a text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" zink-radv-vangogh: - checksum: 3353b921903f712b919135ebebe3827b + checksum: b501ff9ff123d45809d9ce609c9a4d03 zink-radv-gfx1201: - checksum: 84e632818da5a55a4c67bb5c1ab5d473 + checksum: 3a4216d4da6a5f17c8659d7cdefad9ad supertuxkart/supertuxkart-menu.rdc: gl-zink-anv-adl: @@ -361,9 +361,9 @@ traces: checksum: ca0b64f1a62e01765146be8391eae636 text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" zink-radv-vangogh: - checksum: 1301e76c2fb69438e8cfb2f81dcc6611 + checksum: c824b3bc569c722357ca87ce5079c84f zink-radv-gfx1201: - checksum: f01f909e9af079efa388f275526f809f + checksum: b3fdac56b33b7ca30df77f517b5b96f7 unvanquished/unvanquished-lowest.trace: gl-zink-anv-adl: @@ -387,7 +387,7 @@ traces: gl-zink-anv-tgl: checksum: 026dde18e934e7ce3e36eb13ea8e975c zink-radv-vangogh: - checksum: 08f44ba93a13220f5e6db3a00e6449ea + checksum: 9df8f5c18d8dfc09530071dedcf87120 zink-radv-gfx1201: # checksum: 08f44ba93a13220f5e6db3a00e6449ea # checksum: 5078d9c5d272fa231da4d960c90baead