From 24d35a56d99ccc9ff23d4aa058f857652fbc31f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Mon, 17 Jul 2023 13:17:04 +0200 Subject: [PATCH] r300: reorder for easier presubtract 1-x pattern recognition It is much easier to just add a simple late algebraic pass than actually trying to teach the backend to recognize all the different patterns. RV530 shader-db: total instructions in shared programs: 129643 -> 129468 (-0.13%) instructions in affected programs: 17665 -> 17490 (-0.99%) helped: 176 HURT: 39 total presub in shared programs: 4912 -> 5411 (10.16%) presub in affected programs: 1651 -> 2150 (30.22%) helped: 0 HURT: 287 total temps in shared programs: 16904 -> 16918 (0.08%) temps in affected programs: 812 -> 826 (1.72%) helped: 25 HURT: 37 total cycles in shared programs: 194771 -> 194675 (-0.05%) cycles in affected programs: 28096 -> 28000 (-0.34%) helped: 146 HURT: 41 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9364 Reviewed-by: Filip Gawin Part-of: --- src/gallium/drivers/r300/compiler/nir_to_rc.c | 6 +++++ src/gallium/drivers/r300/compiler/r300_nir.h | 4 ++++ .../r300/compiler/r300_nir_algebraic.py | 22 +++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c index 988f13da416..7a783a1cefb 100644 --- a/src/gallium/drivers/r300/compiler/nir_to_rc.c +++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c @@ -26,6 +26,7 @@ #include "compiler/nir/nir_legacy.h" #include "compiler/nir/nir_worklist.h" #include "nir_to_rc.h" +#include "r300_nir.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" #include "tgsi/tgsi_dump.h" @@ -2433,6 +2434,11 @@ const void *nir_to_rc_options(struct nir_shader *s, } } while (progress); + if (s->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(s, r300_nir_prepare_presubtract); + NIR_PASS_V(s, r300_nir_clean_double_fneg); + } + NIR_PASS_V(s, nir_lower_int_to_float); NIR_PASS_V(s, nir_lower_bool_to_float, !options->lower_cmp && !options->lower_fabs); diff --git a/src/gallium/drivers/r300/compiler/r300_nir.h b/src/gallium/drivers/r300/compiler/r300_nir.h index 111bf84e8d0..951e0a8cdbd 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir.h +++ b/src/gallium/drivers/r300/compiler/r300_nir.h @@ -36,4 +36,8 @@ extern bool r300_nir_fuse_fround_d3d9(struct nir_shader *shader); extern bool r300_nir_lower_bool_to_float(struct nir_shader *shader); +extern bool r300_nir_prepare_presubtract(struct nir_shader *shader); + +extern bool r300_nir_clean_double_fneg(struct nir_shader *shader); + #endif /* R300_NIR_H */ diff --git a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py index c4ce7d22722..7967f14f4f2 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py +++ b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py @@ -63,6 +63,22 @@ r300_nir_fuse_fround_d3d9 = [ ('fround_even', 'a')) ] +# Here are some specific optimizations for code reordering such that the backend +# has easier task of recognizing output modifiers and presubtract patterns. +r300_nir_prepare_presubtract = [ + # Backend can only recognize 1 - x pattern. + (('fadd', ('fneg', a), 1.0), ('fadd', 1.0, ('fneg', a))), + (('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))), + (('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))), +] + +# Previous prepare_presubtract pass can sometimes produce double fneg patterns. +# The backend copy propagate could handle it, but the nir to tgsi translation +# does not and blows up. Just run a simple pass to clean it up. +r300_nir_clean_double_fneg = [ + (('fneg', ('fneg', a)), a) +] + def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--import-path', required=True) @@ -103,5 +119,11 @@ def main(): f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float", r300_nir_lower_bool_to_float).render()) + f.write(nir_algebraic.AlgebraicPass("r300_nir_prepare_presubtract", + r300_nir_prepare_presubtract).render()) + + f.write(nir_algebraic.AlgebraicPass("r300_nir_clean_double_fneg", + r300_nir_clean_double_fneg).render()) + if __name__ == '__main__': main()