nir/algebraic: optimize iand/ior of (n)eq zero when umax/umin not available

Before 8e1b75b330 ("nir/algebraic: optimize iand/ior of (n)eq zero") this
optimization didn't need the use of umax/umin. VC4 HW supports only signed
integer max/min operations.

lower_umin and lower_umax are added to allow enabling previous optimizations
behaviour for this cases.

Fixes: 8e1b75b330 ("nir/algebraic: optimize iand/ior of (n)eq zero")
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7083>
(cherry picked from commit e7127b3468)
This commit is contained in:
Jose Maria Casanova Crespo 2020-10-09 18:33:26 +02:00 committed by Dylan Baker
parent f8953b4d81
commit a66268d3f4
3 changed files with 13 additions and 5 deletions

View file

@ -5917,7 +5917,7 @@
"description": "nir/algebraic: optimize iand/ior of (n)eq zero when umax/umin not available",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"master_sha": null,
"because_sha": "8e1b75b330954a2e40e8ed0c04871e43120d4f4a"
},

View file

@ -3017,6 +3017,12 @@ typedef struct nir_shader_compiler_options {
/** enables rules to lower fsign to fsub and flt */
bool lower_fsign;
/** enable rules that avoid generating umax from signed integer ops */
bool lower_umax;
/** enable rules that avoid generating umin from signed integer ops */
bool lower_umin;
/* lower fdph to fdot4 */
bool lower_fdph;

View file

@ -652,10 +652,12 @@ optimizations.extend([
(('ine', ('ineg', ('b2i32', 'a@1')), ('ineg', ('b2i32', 'b@1'))), ('ine', a, b)),
(('b2i32', ('ine', 'a@1', 'b@1')), ('b2i32', ('ixor', a, b))),
(('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('umax', a, b), 0)),
(('ior', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('umin', a, b), 0)),
(('iand', ('ine', 'a@32', 0), ('ine', 'b@32', 0)), ('ine', ('umin', a, b), 0)),
(('ior', ('ine', 'a@32', 0), ('ine', 'b@32', 0)), ('ine', ('umax', a, b), 0)),
(('iand', ('ieq', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('ior', a, b), 0), 'options->lower_umax'),
(('ior', ('ine', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('ior', a, b), 0), 'options->lower_umin'),
(('iand', ('ieq', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('umax', a, b), 0), '!options->lower_umax'),
(('ior', ('ieq', 'a@{}'.format(s), 0), ('ieq', 'b@{}'.format(s), 0)), ('ieq', ('umin', a, b), 0), '!options->lower_umin'),
(('iand', ('ine', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('umin', a, b), 0), '!options->lower_umin'),
(('ior', ('ine', 'a@{}'.format(s), 0), ('ine', 'b@{}'.format(s), 0)), ('ine', ('umax', a, b), 0), '!options->lower_umax'),
# This pattern occurs coutresy of __flt64_nonnan in the soft-fp64 code.
# The first part of the iand comes from the !__feq64_nonnan.