nir/algebraic: Basic patterns for dot_4x8

v2: Add and modify patterns to let constant folding do better.

v3: Remove '(is_not_zero)' from the patterns that try to combine
addends.  I honestly don't know why I had it there in the first place,
and nothing in my deep git logs could help clue me in.  Noticed by
Alyssa.  Remover patterns that detect open-coded udot_4x8.  Suggested by
Alyssa and Jason.  Add missing sudot_4x8 patterns.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12142>
This commit is contained in:
Ian Romanick 2021-02-23 19:12:49 -08:00 committed by Marge Bot
parent 6c18a3b497
commit 806cd2341c

View file

@ -192,6 +192,35 @@ optimizations = [
# flrp(a, a + b, c) => a + flrp(0, b, c) => a + (b * c)
(('~flrp', a, ('fadd(is_used_once)', a, b), c), ('fadd', ('fmul', b, c), a)),
(('sdot_4x8_iadd', a, 0, b), b),
(('udot_4x8_uadd', a, 0, b), b),
(('sdot_4x8_iadd_sat', a, 0, b), b),
(('udot_4x8_uadd_sat', a, 0, b), b),
# sudot_4x8_iadd is not commutative at all, so the patterns must be
# duplicated with zeros on each of the first positions.
(('sudot_4x8_iadd', a, 0, b), b),
(('sudot_4x8_iadd', 0, a, b), b),
(('sudot_4x8_iadd_sat', a, 0, b), b),
(('sudot_4x8_iadd_sat', 0, a, b), b),
(('iadd', ('sdot_4x8_iadd(is_used_once)', a, b, '#c'), '#d'), ('sdot_4x8_iadd', a, b, ('iadd', c, d))),
(('iadd', ('udot_4x8_uadd(is_used_once)', a, b, '#c'), '#d'), ('udot_4x8_uadd', a, b, ('iadd', c, d))),
(('iadd', ('sudot_4x8_iadd(is_used_once)', a, b, '#c'), '#d'), ('sudot_4x8_iadd', a, b, ('iadd', c, d))),
# Try to let constant folding eliminate the dot-product part. These are
# safe because the dot product cannot overflow 32 bits.
(('iadd', ('sdot_4x8_iadd', 'a(is_not_const)', b, 0), c), ('sdot_4x8_iadd', a, b, c)),
(('iadd', ('udot_4x8_uadd', 'a(is_not_const)', b, 0), c), ('udot_4x8_uadd', a, b, c)),
(('iadd', ('sudot_4x8_iadd', 'a(is_not_const)', b, 0), c), ('sudot_4x8_iadd', a, b, c)),
(('iadd', ('sudot_4x8_iadd', a, 'b(is_not_const)', 0), c), ('sudot_4x8_iadd', a, b, c)),
(('sdot_4x8_iadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('sdot_4x8_iadd', a, b, 0), c)),
(('udot_4x8_uadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('udot_4x8_uadd', a, b, 0), c)),
(('sudot_4x8_iadd', '#a', '#b', 'c(is_not_const)'), ('iadd', ('sudot_4x8_iadd', a, b, 0), c)),
(('sdot_4x8_iadd_sat', '#a', '#b', 'c(is_not_const)'), ('iadd_sat', ('sdot_4x8_iadd', a, b, 0), c), '!options->lower_add_sat'),
(('udot_4x8_uadd_sat', '#a', '#b', 'c(is_not_const)'), ('uadd_sat', ('udot_4x8_uadd', a, b, 0), c), '!options->lower_add_sat'),
(('sudot_4x8_iadd_sat', '#a', '#b', 'c(is_not_const)'), ('iadd_sat', ('sudot_4x8_iadd', a, b, 0), c), '!options->lower_add_sat'),
]
# Float sizes