mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 10:20:09 +01:00
nir/algebraic: add various ffma optimizations
fossil-db (GFX10.3): Totals from 7532 (5.15% of 146267) affected shaders: VGPRs: 414696 -> 414304 (-0.09%); split: -0.18%, +0.08% CodeSize: 33393444 -> 33375908 (-0.05%); split: -0.13%, +0.08% MaxWaves: 149854 -> 150094 (+0.16%); split: +0.27%, -0.11% Instrs: 6279823 -> 6271364 (-0.13%); split: -0.18%, +0.05% Latency: 60308898 -> 60296025 (-0.02%); split: -0.13%, +0.11% InvThroughput: 13770542 -> 13745192 (-0.18%); split: -0.24%, +0.06% fossil-db (GFX10): Totals from 7532 (5.15% of 146267) affected shaders: VGPRs: 406664 -> 405564 (-0.27%); split: -0.39%, +0.12% CodeSize: 33544656 -> 33527568 (-0.05%); split: -0.13%, +0.08% MaxWaves: 158584 -> 158858 (+0.17%); split: +0.30%, -0.13% Instrs: 6316242 -> 6307913 (-0.13%); split: -0.18%, +0.05% Latency: 60243290 -> 60232844 (-0.02%); split: -0.13%, +0.11% InvThroughput: 13643345 -> 13620171 (-0.17%); split: -0.24%, +0.07% fossil-db (GFX9): Totals from 7543 (5.15% of 146401) affected shaders: SGPRs: 546384 -> 547472 (+0.20%); split: -0.08%, +0.28% VGPRs: 412636 -> 411896 (-0.18%); split: -0.27%, +0.09% CodeSize: 33216196 -> 33210564 (-0.02%); split: -0.12%, +0.11% MaxWaves: 38771 -> 38789 (+0.05%); split: +0.17%, -0.12% Instrs: 6419878 -> 6414891 (-0.08%); split: -0.18%, +0.11% Latency: 70972327 -> 70922754 (-0.07%); split: -0.15%, +0.08% InvThroughput: 33949039 -> 33909258 (-0.12%); split: -0.20%, +0.08% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8056>
This commit is contained in:
parent
b0238191ca
commit
110bcb4919
1 changed files with 18 additions and 0 deletions
|
|
@ -141,6 +141,10 @@ optimizations = [
|
|||
(('usadd_4x8_vc4', a, 0), a),
|
||||
(('usadd_4x8_vc4', a, ~0), ~0),
|
||||
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
|
||||
(('~ffma', a, b, ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
|
||||
(('~ffma', a, b, ('fmul(is_used_once)', a, c)), ('fmul', a, ('fadd', b, c))),
|
||||
(('~fadd', ('fmul(is_used_once)', a, b), ('ffma(is_used_once)', a, c, d)), ('ffma', a, ('fadd', b, c), d)),
|
||||
(('~ffma', a, ('fmul(is_used_once)', b, c), ('fmul(is_used_once)', b, d)), ('fmul', b, ('ffma', a, c, d))),
|
||||
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
|
||||
(('iand', ('ior', a, b), ('ior', a, c)), ('ior', a, ('iand', b, c))),
|
||||
(('ior', ('iand', a, b), ('iand', a, c)), ('iand', a, ('ior', b, c))),
|
||||
|
|
@ -180,6 +184,7 @@ optimizations = [
|
|||
(('ffma@32', a, b, 0.0), ('fmul', a, b), '!'+signed_zero_inf_nan_preserve_32),
|
||||
(('ffma', 1.0, a, b), ('fadd', a, b)),
|
||||
(('ffma', -1.0, a, b), ('fadd', ('fneg', a), b)),
|
||||
(('~ffma', '#a', '#b', c), ('fadd', ('fmul', a, b), c)),
|
||||
(('~flrp', a, b, 0.0), a),
|
||||
(('~flrp', a, b, 1.0), b),
|
||||
(('~flrp', a, a, b), a),
|
||||
|
|
@ -207,6 +212,13 @@ for s in [16, 32, 64]:
|
|||
(('~fadd@{}'.format(s), ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1')))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
|
||||
(('~fadd@{}'.format(s), a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
|
||||
|
||||
(('~ffma@{}'.format(s), a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1'))), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, b, a)),
|
||||
(('~ffma@{}'.format(s), b, ('b2f', 'c@1'), ('ffma', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, b, a)),
|
||||
|
||||
# These two aren't flrp lowerings, but do appear in some shaders.
|
||||
(('~ffma@{}'.format(s), ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, b, a)),
|
||||
(('~ffma@{}'.format(s), ('b2f', 'c@1'), ('ffma', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, d, ('fmul', a, b))),
|
||||
|
||||
# 1 - ((1 - a) * (1 - b))
|
||||
# 1 - (1 - a - b + a*b)
|
||||
# 1 - 1 + a + b - a*b
|
||||
|
|
@ -1336,23 +1348,29 @@ optimizations.extend([
|
|||
|
||||
# Propagate negation up multiplication chains
|
||||
(('fmul(is_used_by_non_fsat)', ('fneg', a), b), ('fneg', ('fmul', a, b))),
|
||||
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
|
||||
(('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
|
||||
|
||||
# Propagate constants up multiplication chains
|
||||
(('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul', ('fmul', a, c), b)),
|
||||
(('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)),
|
||||
(('~ffma', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c', d), ('ffma', ('fmul', a, c), b, d)),
|
||||
# Prefer moving out a multiplication for more MAD/FMA-friendly code
|
||||
(('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c'), ('fadd', ('fadd', a, c), b)),
|
||||
(('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)),
|
||||
(('~fadd(is_used_once)', ('ffma(is_used_once)', 'a(is_not_const)', b, 'c(is_not_const)'), '#d'), ('fadd', ('ffma', a, b, d), c)),
|
||||
(('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)),
|
||||
|
||||
# Reassociate constants in add/mul chains so they can be folded together.
|
||||
# For now, we mostly only handle cases where the constants are separated by
|
||||
# a single non-constant. We could do better eventually.
|
||||
(('~fmul', '#a', ('fmul', 'b(is_not_const)', '#c')), ('fmul', ('fmul', a, c), b)),
|
||||
(('~ffma', '#a', ('fmul', 'b(is_not_const)', '#c'), d), ('ffma', ('fmul', a, c), b, d)),
|
||||
(('imul', '#a', ('imul', 'b(is_not_const)', '#c')), ('imul', ('imul', a, c), b)),
|
||||
(('~fadd', '#a', ('fadd', 'b(is_not_const)', '#c')), ('fadd', ('fadd', a, c), b)),
|
||||
(('~fadd', '#a', ('fneg', ('fadd', 'b(is_not_const)', '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
|
||||
(('~fadd', '#a', ('ffma', 'b(is_not_const)', 'c(is_not_const)', '#d')), ('ffma', b, c, ('fadd', a, d))),
|
||||
(('~fadd', '#a', ('fneg', ('ffma', 'b(is_not_const)', 'c(is_not_const)', '#d'))), ('ffma', ('fneg', b), c, ('fadd', a, ('fneg', d)))),
|
||||
(('iadd', '#a', ('iadd', 'b(is_not_const)', '#c')), ('iadd', ('iadd', a, c), b)),
|
||||
(('iand', '#a', ('iand', 'b(is_not_const)', '#c')), ('iand', ('iand', a, c), b)),
|
||||
(('ior', '#a', ('ior', 'b(is_not_const)', '#c')), ('ior', ('ior', a, c), b)),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue