diff --git a/src/amd/ci/traces-amd.yml b/src/amd/ci/traces-amd.yml index cd25c3f6d96..8178eca656a 100644 --- a/src/amd/ci/traces-amd.yml +++ b/src/amd/ci/traces-amd.yml @@ -68,7 +68,7 @@ traces: checksum: 2832d2190df232bcbde71a4b720303e1 gputest/tessmark-v2.trace: gl-radeonsi-stoney: - checksum: 609c82248eec789f7389063639f288c3 + checksum: dd3b850cae31a4d0c007377f67c54757 humus/AmbientAperture-v2.trace: gl-radeonsi-stoney: checksum: ed3444722d2468d5306bc734f0c24089 @@ -98,13 +98,13 @@ traces: checksum: 4c36bc32bc4512773533bbbea078f65b supertuxkart/supertuxkart-antediluvian-abyss.rdc: gl-radeonsi-stoney: - checksum: 6b2e8d20d2774ece96c999ef01533695 + checksum: f5c2bef676a4b5f25997872b78caa402 supertuxkart/supertuxkart-menu.rdc: gl-radeonsi-stoney: checksum: e17f3125683d8b2acac8bc0b4b8e1b44 supertuxkart/supertuxkart-ravenbridge-mansion.rdc: gl-radeonsi-stoney: - checksum: 3719b5ef32db846570f9b9a3b5ac8705 + checksum: a535e9a02e01d57938b6b845cbefd0c9 ror/ror-default.trace: gl-radeonsi-stoney: checksum: 9ab581db528a53d91f870a064aff9cd4 diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 04c924c0d14..5d623950c10 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -277,6 +277,9 @@ radv_optimize_nir_algebraic_early(nir_shader *nir) void radv_optimize_nir_algebraic_late(nir_shader *nir) { + if (nir->info.stage != MESA_SHADER_VERTEX && nir->info.stage != MESA_SHADER_GEOMETRY) + NIR_PASS(_, nir, nir_opt_reassociate_for_fma); + /* Do late algebraic optimization to turn add(a, * neg(b)) back into subs, then the mandatory cleanup * after algebraic. Note that it may produce fnegs, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f79f5104af4..2ede4c42f7c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6688,6 +6688,7 @@ bool nir_opt_algebraic_late(nir_shader *shader); bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); bool nir_opt_algebraic_integer_promotion(nir_shader *shader); bool nir_opt_reassociate_matrix_mul(nir_shader *shader); +bool nir_opt_reassociate_for_fma(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); bool nir_opt_fp_math_ctrl(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 9d0cd043504..135067d934c 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -4246,6 +4246,23 @@ before_lower_int64_optimizations = [ (('iadd', ('u2u64', a), ('u2u64', a)), ('ishl', ('u2u64', a), 1)), ] +reassoc_fma_optimizations = [ + # Try to reassociate fadd to make more adds have a fmul source + (('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', ('fadd(is_used_once)', 'c(is_fmul)', 'd(is_fmul)'))), 'e(is_not_fmul)'), + ('fadd', a, ('fadd', b, ('fadd', c, ('fadd', d, e))))), + (('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', 'c(is_fmul)')), 'd(is_not_fmul)'), + ('fadd', a, ('fadd', b, ('fadd', c, d)))), + (('~fadd', ('fadd(is_used_once)', 'a(is_fmul)', 'b(is_fmul)'), 'c(is_not_fmul)'), + ('fadd', a, ('fadd', b, c))), + + (('~fadd', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', ('fadd(is_used_once)', 'c(is_fmul)', 'd(is_fmul)')))), 'e(is_not_fmul)'), + ('fadd', ('fneg', a), ('fneg', ('fadd', b, ('fadd', c, ('fadd', d, ('fneg', e))))))), + (('~fadd', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_fmul)', ('fadd(is_used_once)', 'b(is_fmul)', 'c(is_fmul)'))), 'd(is_not_fmul)'), + ('fadd', ('fneg', a), ('fneg', ('fadd', b, ('fadd', c, ('fneg', d)))))), + (('~fadd', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_fmul)', 'b(is_fmul)')), 'c(is_not_fmul)'), + ('fadd', ('fneg', a), ('fadd', ('fneg', b), c))), +] + # Those optimizations try to reverse integer promotion found in e.g. OpenCL C. Those should be ran # before any bit_size lowering is done. integer_promotion_optimizations = [] @@ -4321,6 +4338,12 @@ passes.append(nir_algebraic.AlgebraicPass( build_tests=build_tests )) +passes.append(nir_algebraic.AlgebraicPass( + "nir_opt_reassociate_for_fma", + reassoc_fma_optimizations, + build_tests=build_tests +)) + if build_tests: with open(args.out_tests, "w", encoding='utf-8') as f: for p in passes: diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_nir.c b/src/gallium/drivers/radeonsi/gfx/si_shader_nir.c index 6a0f03685ed..9b0d7cecc2a 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_nir.c @@ -94,6 +94,9 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_arr void si_nir_late_opts(nir_shader *nir) { + if (nir->info.stage != MESA_SHADER_VERTEX && nir->info.stage != MESA_SHADER_GEOMETRY) + NIR_PASS(_, nir, nir_opt_reassociate_for_fma); + bool more_late_algebraic = true; while (more_late_algebraic) { more_late_algebraic = false; diff --git a/src/gallium/drivers/zink/ci/traces-zink.yml b/src/gallium/drivers/zink/ci/traces-zink.yml index 18ad215e443..6d26057c633 100644 --- a/src/gallium/drivers/zink/ci/traces-zink.yml +++ b/src/gallium/drivers/zink/ci/traces-zink.yml @@ -99,9 +99,9 @@ traces: checksum: dbe1de4e2e812413f173ea6c423117ff text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" zink-radv-vangogh: - checksum: 65e86206d1ba3ed8365f8b95dc9e9e71 + checksum: b09d4cd898f3bd6629890fc8e7ee5ec1 zink-radv-gfx1201: - checksum: f22ee8433dd7b689c46816d576bf3c12 + checksum: 4d38468286742d9bfcd8cfb6118acc8a gputest/pixmark-julia-fp32-v2.trace: gl-zink-anv-adl: @@ -153,9 +153,9 @@ traces: gl-zink-anv-tgl: checksum: 04a5a9b7cbc88d8a66c33b5f1c08294d zink-radv-vangogh: - checksum: c60118df2c8d52d101c789d919c6d19b + checksum: b361dcda8115d96061fd3700c1d5c415 zink-radv-gfx1201: - checksum: c60118df2c8d52d101c789d919c6d19b + checksum: b361dcda8115d96061fd3700c1d5c415 humus/AmbientAperture-v2.trace: gl-zink-anv-adl: @@ -163,9 +163,9 @@ traces: gl-zink-anv-tgl: checksum: 0f3b7351a84e1e6f15430f8766af4b4c zink-radv-vangogh: - checksum: 4c56004f4f5f30c748b1ce5a9f3f882f + checksum: 8347dcdc720c4a3799df737f24c10a88 zink-radv-gfx1201: - checksum: aaa80aa7831b764fb2a24f885eeb70d6 + checksum: 31566f62731204fe422d22db9c5ba759 humus/Portals-v2.trace: gl-zink-anv-adl: @@ -333,9 +333,9 @@ traces: checksum: 0af2faa0d9183c1bc4dc7612befe1f0a text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" zink-radv-vangogh: - checksum: 3353b921903f712b919135ebebe3827b + checksum: b501ff9ff123d45809d9ce609c9a4d03 zink-radv-gfx1201: - checksum: 84e632818da5a55a4c67bb5c1ab5d473 + checksum: 3a4216d4da6a5f17c8659d7cdefad9ad supertuxkart/supertuxkart-menu.rdc: gl-zink-anv-adl: @@ -361,9 +361,9 @@ traces: checksum: ca0b64f1a62e01765146be8391eae636 text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" zink-radv-vangogh: - checksum: 1301e76c2fb69438e8cfb2f81dcc6611 + checksum: c824b3bc569c722357ca87ce5079c84f zink-radv-gfx1201: - checksum: f01f909e9af079efa388f275526f809f + checksum: b3fdac56b33b7ca30df77f517b5b96f7 unvanquished/unvanquished-lowest.trace: gl-zink-anv-adl: @@ -387,7 +387,7 @@ traces: gl-zink-anv-tgl: checksum: 026dde18e934e7ce3e36eb13ea8e975c zink-radv-vangogh: - checksum: 08f44ba93a13220f5e6db3a00e6449ea + checksum: 9df8f5c18d8dfc09530071dedcf87120 zink-radv-gfx1201: # checksum: 08f44ba93a13220f5e6db3a00e6449ea # checksum: 5078d9c5d272fa231da4d960c90baead