From b88ac6b381c8648ec9fcded61aaa09077ed394c7 Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Fri, 1 Dec 2023 18:44:44 +0100 Subject: [PATCH] nir: Optimize fpow with small constant exponents They would be turned into exp(log(a)*b) instead, which is slow. Totals from 2146 (2.52% of 85071) affected shaders: MaxWaves: 35769 -> 35779 (+0.03%); split: +0.03%, -0.01% Instrs: 6476835 -> 6465494 (-0.18%); split: -0.18%, +0.00% CodeSize: 35382288 -> 35347092 (-0.10%); split: -0.10%, +0.00% SpillSGPRs: 1055 -> 1017 (-3.60%) Latency: 75211743 -> 75063623 (-0.20%); split: -0.20%, +0.00% InvThroughput: 17525115 -> 17501745 (-0.13%); split: -0.14%, +0.00% VClause: 200089 -> 200077 (-0.01%); split: -0.01%, +0.01% SClause: 293566 -> 293480 (-0.03%); split: -0.03%, +0.00% Copies: 649631 -> 640516 (-1.40%); split: -1.44%, +0.03% Branches: 268441 -> 268325 (-0.04%) PreSGPRs: 146868 -> 146045 (-0.56%) PreVGPRs: 134125 -> 134128 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Georg Lehmann Reviewed-by: Faith Ekstrand Part-of: --- src/amd/ci/traces-amd.yml | 2 +- src/compiler/nir/nir_opt_algebraic.py | 5 +++++ src/freedreno/ci/traces-freedreno.yml | 4 ++-- src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml | 2 +- src/gallium/drivers/virgl/ci/traces-virgl.yml | 2 +- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/amd/ci/traces-amd.yml b/src/amd/ci/traces-amd.yml index dd107f076a6..5bb4ea4290f 100644 --- a/src/amd/ci/traces-amd.yml +++ b/src/amd/ci/traces-amd.yml @@ -52,7 +52,7 @@ traces: checksum: 0e7441a7f2c86c75eeac72269786e7a1 gputest/gimark-v2.trace: gl-radeonsi-stoney: - checksum: 32d373e5ae6cd241c5a7b286c296113f + checksum: f2aee98f2b8813ad60d98cf241e42114 gputest/pixmark-julia-fp32-v2.trace: gl-radeonsi-stoney: checksum: b5e0ae5ad680d884432f89a923f3e70d diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 89d51b17072..88293c995c0 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1443,9 +1443,14 @@ optimizations.extend([ ('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d (('~fexp2', ('fmul', ('flog2', a), 0.5)), ('fsqrt', a)), (('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)), + (('~fexp2', ('fmul', ('flog2', a), 3.0)), ('fmul', ('fmul', a, a), a)), (('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), ('fmul', a, a))), + (('~fexp2', ('fmul', ('flog2', a), 5.0)), ('fmul', ('fmul', ('fmul', a, a), ('fmul', a, a)), a)), + (('~fexp2', ('fmul', ('flog2', a), 6.0)), ('fmul', ('fmul', ('fmul', a, a), ('fmul', a, a)), ('fmul', a, a))), + (('~fexp2', ('fmul', ('flog2', a), 8.0)), ('fmul', ('fmul', ('fmul', a, a), ('fmul', a, a)), ('fmul', ('fmul', a, a), ('fmul', a, a)))), (('~fpow', a, 1.0), a), (('~fpow', a, 2.0), ('fmul', a, a)), + (('~fpow', a, 3.0), ('fmul', ('fmul', a, a), a)), (('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), (('~fpow', 2.0, a), ('fexp2', a)), (('~fpow', ('fpow', a, 2.2), 0.454545), a), diff --git a/src/freedreno/ci/traces-freedreno.yml b/src/freedreno/ci/traces-freedreno.yml index 93efb299bcf..824aef2b39a 100644 --- a/src/freedreno/ci/traces-freedreno.yml +++ b/src/freedreno/ci/traces-freedreno.yml @@ -344,7 +344,7 @@ traces: text: missing 3/4 of rendered image checksum: e7f01e62180b60aef8c67fc4977c90d1 freedreno-a530: - checksum: 777ecb806510d9a038a0d06d44deeb6f + checksum: c0db49f777798ec7da518bb9eff099c8 freedreno-a618: label: [skip] freedreno-a630: @@ -353,7 +353,7 @@ traces: One of the material textures appears brighter on freedreno than i965 in a way that is probably wrong. zink-a618: - checksum: ba79e6d4a64b4391d3e9e20d3d630f0a + checksum: d4ac6b6dd39f5a63a7b6e0b39f81daa3 zink-a630: checksum: e64bcc59d61d1c75ce5eeb109343d9bd diff --git a/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml b/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml index fd85705189a..57a0bfda02a 100644 --- a/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml +++ b/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml @@ -111,7 +111,7 @@ traces: checksum: 58a6a276abc0e28fcb2a8acea3342712 gputest/pixmark-piano-v2.trace: gl-vmware-llvmpipe: - checksum: 11e2a97c14c74e771483ca0d90f9bde3 + checksum: edc09da55fea262e76686d99548f2cfd gputest/triangle-v2.trace: gl-vmware-llvmpipe: checksum: 7812de00011a3a059892e36cea19c696 diff --git a/src/gallium/drivers/virgl/ci/traces-virgl.yml b/src/gallium/drivers/virgl/ci/traces-virgl.yml index df562f1f926..bf5fcc8539b 100644 --- a/src/gallium/drivers/virgl/ci/traces-virgl.yml +++ b/src/gallium/drivers/virgl/ci/traces-virgl.yml @@ -12,7 +12,7 @@ traces: checksum: 57ddd36b117adc9216c65c10d914a37e gputest/pixmark-piano-v2.trace: gl-virgl: - checksum: 0d875bda7edc01698342b157c6f51500 + checksum: cbe50265c2d1a114fd75bf12407fbad9 gputest/triangle-v2.trace: gl-virgl: checksum: 7812de00011a3a059892e36cea19c696