diff --git a/src/intel/compiler/brw_nir_lower_fsign.py b/src/intel/compiler/brw_nir_lower_fsign.py index 6821b3d98f5..04c6a2bde68 100644 --- a/src/intel/compiler/brw_nir_lower_fsign.py +++ b/src/intel/compiler/brw_nir_lower_fsign.py @@ -16,6 +16,16 @@ lower_fsign = [ # is_finite. # # NOTE: fcsel opcodes are currently limited to float32 in NIR. + (('fmul@32(is_only_used_as_float)', ('fsign(is_used_once)', 'a(is_not_negative)'), b), ('fcsel_gt', a , b , ('fmul', b, 0.0 ))), + (('~fmul@32', ('fsign(is_used_once)', 'a(is_not_negative)'), b), ('fcsel_gt', a , b , 0.0 )), + (('fmul@32(is_only_used_as_float)', ('fsign(is_used_once)', 'a(is_not_positive)'), b), ('fcsel_gt', ('fneg', a), ('fneg', b), ('fmul', b, 0x80000000))), + (('~fmul@32', ('fsign(is_used_once)', 'a(is_not_positive)'), b), ('fcsel_gt', ('fneg', a), ('fneg', b), 0x80000000 )), + + (('fmul@16(is_only_used_as_float)', ('fsign(is_used_once)', 'a(is_not_negative)'), b), ('bcsel', ('!flt', 0, a ), b , ('fmul', b, 0.0 ))), + (('~fmul@16', ('fsign(is_used_once)', 'a(is_not_negative)'), b), ('bcsel', ('!flt', 0, a ), b , 0.0 )), + (('fmul@16(is_only_used_as_float)', ('fsign(is_used_once)', 'a(is_not_positive)'), b), ('bcsel', ('!flt', 0, ('fneg', a)), ('fneg', b), ('fmul', b, 0x8000))), + (('~fmul@16', ('fsign(is_used_once)', 'a(is_not_positive)'), b), ('bcsel', ('!flt', 0, ('fneg', a)), ('fneg', b), 0x8000 )), + (('fmul@32(is_only_used_as_float,nsz)', ('fsign(is_used_once)', a), 'b(is_finite)'), ('fcsel_gt', a, b, ('fcsel_gt', ('fneg', a), ('fneg', b), 0.0))), (('fmul@32(is_only_used_as_float,nsz,nnan)', ('fsign(is_used_once)', a), b ), ('fcsel_gt', a, b, ('fcsel_gt', ('fneg', a), ('fneg', b), 0.0))), (('~fmul@32', ('fsign(is_used_once)', a), b ), ('fcsel_gt', a, b, ('fcsel_gt', ('fneg', a), ('fneg', b), 0.0))), @@ -25,6 +35,12 @@ lower_fsign = [ # only slight deviation is that it can provide -0 for some NaN inputs. (('fsign@32', a), ('fcsel_gt', ('fabs', a) , ('ior', ('iand', a, 0x80000000), 0x3f800000), ('iand', a, 0x80000000))), (('fsign@16', a), ('bcsel', ('!flt', 0, ('fabs', a)), ('ior', ('iand', a, 0x8000 ), 0x3c00 ), ('iand', a, 0x8000 ))), + + # The only effect a*0.0 should have is when 'a' is infinity, -0.0 or NaN + (('fmul(nsz,nnan)', 'a', 0.0), 0.0), + (('fmul(nsz)', 'a(is_finite)', 0.0), 0.0), + (('fmul(nsz,nnan)', 'a@32', 0x80000000), 0.0), + (('fmul(nsz,nnan)', 'a@16', 0x8000 ), 0.0), ] def main():