mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 06:10:13 +01:00
nir: Optimize fpow with small constant exponents
They would be turned into exp(log(a)*b) instead, which is slow. Totals from 2146 (2.52% of 85071) affected shaders: MaxWaves: 35769 -> 35779 (+0.03%); split: +0.03%, -0.01% Instrs: 6476835 -> 6465494 (-0.18%); split: -0.18%, +0.00% CodeSize: 35382288 -> 35347092 (-0.10%); split: -0.10%, +0.00% SpillSGPRs: 1055 -> 1017 (-3.60%) Latency: 75211743 -> 75063623 (-0.20%); split: -0.20%, +0.00% InvThroughput: 17525115 -> 17501745 (-0.13%); split: -0.14%, +0.00% VClause: 200089 -> 200077 (-0.01%); split: -0.01%, +0.01% SClause: 293566 -> 293480 (-0.03%); split: -0.03%, +0.00% Copies: 649631 -> 640516 (-1.40%); split: -1.44%, +0.03% Branches: 268441 -> 268325 (-0.04%) PreSGPRs: 146868 -> 146045 (-0.56%) PreVGPRs: 134125 -> 134128 (+0.00%); split: -0.00%, +0.01% Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26727>
This commit is contained in:
parent
8b3496df30
commit
b88ac6b381
5 changed files with 10 additions and 5 deletions
|
|
@ -52,7 +52,7 @@ traces:
|
|||
checksum: 0e7441a7f2c86c75eeac72269786e7a1
|
||||
gputest/gimark-v2.trace:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: 32d373e5ae6cd241c5a7b286c296113f
|
||||
checksum: f2aee98f2b8813ad60d98cf241e42114
|
||||
gputest/pixmark-julia-fp32-v2.trace:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: b5e0ae5ad680d884432f89a923f3e70d
|
||||
|
|
|
|||
|
|
@ -1443,9 +1443,14 @@ optimizations.extend([
|
|||
('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
|
||||
(('~fexp2', ('fmul', ('flog2', a), 0.5)), ('fsqrt', a)),
|
||||
(('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)),
|
||||
(('~fexp2', ('fmul', ('flog2', a), 3.0)), ('fmul', ('fmul', a, a), a)),
|
||||
(('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), ('fmul', a, a))),
|
||||
(('~fexp2', ('fmul', ('flog2', a), 5.0)), ('fmul', ('fmul', ('fmul', a, a), ('fmul', a, a)), a)),
|
||||
(('~fexp2', ('fmul', ('flog2', a), 6.0)), ('fmul', ('fmul', ('fmul', a, a), ('fmul', a, a)), ('fmul', a, a))),
|
||||
(('~fexp2', ('fmul', ('flog2', a), 8.0)), ('fmul', ('fmul', ('fmul', a, a), ('fmul', a, a)), ('fmul', ('fmul', a, a), ('fmul', a, a)))),
|
||||
(('~fpow', a, 1.0), a),
|
||||
(('~fpow', a, 2.0), ('fmul', a, a)),
|
||||
(('~fpow', a, 3.0), ('fmul', ('fmul', a, a), a)),
|
||||
(('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
|
||||
(('~fpow', 2.0, a), ('fexp2', a)),
|
||||
(('~fpow', ('fpow', a, 2.2), 0.454545), a),
|
||||
|
|
|
|||
|
|
@ -344,7 +344,7 @@ traces:
|
|||
text: missing 3/4 of rendered image
|
||||
checksum: e7f01e62180b60aef8c67fc4977c90d1
|
||||
freedreno-a530:
|
||||
checksum: 777ecb806510d9a038a0d06d44deeb6f
|
||||
checksum: c0db49f777798ec7da518bb9eff099c8
|
||||
freedreno-a618:
|
||||
label: [skip]
|
||||
freedreno-a630:
|
||||
|
|
@ -353,7 +353,7 @@ traces:
|
|||
One of the material textures appears brighter on freedreno than
|
||||
i965 in a way that is probably wrong.
|
||||
zink-a618:
|
||||
checksum: ba79e6d4a64b4391d3e9e20d3d630f0a
|
||||
checksum: d4ac6b6dd39f5a63a7b6e0b39f81daa3
|
||||
zink-a630:
|
||||
checksum: e64bcc59d61d1c75ce5eeb109343d9bd
|
||||
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ traces:
|
|||
checksum: 58a6a276abc0e28fcb2a8acea3342712
|
||||
gputest/pixmark-piano-v2.trace:
|
||||
gl-vmware-llvmpipe:
|
||||
checksum: 11e2a97c14c74e771483ca0d90f9bde3
|
||||
checksum: edc09da55fea262e76686d99548f2cfd
|
||||
gputest/triangle-v2.trace:
|
||||
gl-vmware-llvmpipe:
|
||||
checksum: 7812de00011a3a059892e36cea19c696
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ traces:
|
|||
checksum: 57ddd36b117adc9216c65c10d914a37e
|
||||
gputest/pixmark-piano-v2.trace:
|
||||
gl-virgl:
|
||||
checksum: 0d875bda7edc01698342b157c6f51500
|
||||
checksum: cbe50265c2d1a114fd75bf12407fbad9
|
||||
gputest/triangle-v2.trace:
|
||||
gl-virgl:
|
||||
checksum: 7812de00011a3a059892e36cea19c696
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue