nir/algebraic: Fix NaN-unsafe fcsel patterns

For example, the proof for this pattern

    (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, c, b)),

would be

    bcsel(a < 0, b, c)
    bcsel(!(a < 0), c, b)
    bcsel(a >= 0, c, b)
    fcsel_ge(a, c, b)

However, !(a < 0) => (a >= 0) is well known to produce different
results if `a` is NaN.

Instead of that replacement, use this replacement:

    bcsel(a < 0, b, c)
    bcsel(-0 < -a, b, c)
    bcsel(0 < -a, b, c)
    fcsel_gt(-a, b, c)

This is NaN-safe and exact.

Reviewed-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Fixes: 0f5b3c37c5 ("nir: Add opcodes for fused comp + csel and optimizations")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17048>
This commit is contained in:
Ian Romanick 2021-03-25 13:13:21 -07:00 committed by Marge Bot
parent ccd18ec4f3
commit a2a2fbc510

View file

@ -1985,9 +1985,9 @@ optimizations.extend([
(('imul24', a, 0), (0)),
(('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
(('fcsel', ('slt', a, 0), b, c), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"),
(('fcsel', ('slt', a, 0), b, c), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
(('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
(('fcsel', ('sge', 0, a), b, c), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"),
(('fcsel', ('sge', 0, a), b, c), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
(('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"),
(('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"),
@ -1995,9 +1995,9 @@ optimizations.extend([
(('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"),
(('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
(('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"),
(('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
(('bcsel', ('fge', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
(('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"),
(('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),
])