From b86305bb5734fdd0511d227b646328ef03c80197 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 4 Sep 2020 01:48:35 -0400 Subject: [PATCH] nir/algebraic: collapse conversion opcodes (many patterns) mediump inserts a lot of conversions. This cleans up the IR. All other combinations are covered too. Reviewed-by: Rob Clark Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 80 ++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 9 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 663fb365135..f90c1df7c6b 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -673,18 +673,63 @@ for s in [16, 32, 64]: (('iadd', ('b2i{}'.format(s), ('flt', 0, 'a@{}'.format(s))), ('ineg', ('b2i{}'.format(s), ('flt', 'a@{}'.format(s), 0)))), ('f2i{}'.format(s), ('fsign', a)), '!options->lower_fsign'), ]) - # Conversions from a lower bit size to a higher bit size and back can always be removed - for h in [16, 32, 64]: - if s < h: + # float? -> float? -> floatS ==> float? -> floatS + (('~f2f{}'.format(s), ('f2f', a)), ('f2f{}'.format(s), a)), + + # int? -> float? -> floatS ==> int? -> floatS + (('~f2f{}'.format(s), ('u2f', a)), ('u2f{}'.format(s), a)), + (('~f2f{}'.format(s), ('i2f', a)), ('i2f{}'.format(s), a)), + + # float? -> float? -> intS ==> float? -> intS + (('~f2u{}'.format(s), ('f2f', a)), ('f2u{}'.format(s), a)), + (('~f2i{}'.format(s), ('f2f', a)), ('f2i{}'.format(s), a)), + + for B in [32, 64]: + if s < B: optimizations.extend([ - (('f2f{}'.format(s), ('f2f{}'.format(h), 'a@{}'.format(s))), a), - (('i2i{}'.format(s), ('i2i{}'.format(h), 'a@{}'.format(s))), a), - (('u2u{}'.format(s), ('u2u{}'.format(h), 'a@{}'.format(s))), a), - (('f2f{}'.format(s), ('b2f{}'.format(h), 'a@1')), ('b2f{}'.format(s), a)), - (('i2i{}'.format(s), ('b2i{}'.format(h), 'a@1')), ('b2i{}'.format(s), a)), - (('u2u{}'.format(s), ('b2i{}'.format(h), 'a@1')), ('b2i{}'.format(s), a)), + # S = smaller, B = bigger + # typeS -> typeB -> typeS ==> identity + (('f2f{}'.format(s), ('f2f{}'.format(B), 'a@{}'.format(s))), a), + (('i2i{}'.format(s), ('i2i{}'.format(B), 'a@{}'.format(s))), a), + (('u2u{}'.format(s), ('u2u{}'.format(B), 'a@{}'.format(s))), a), + + # bool1 -> typeB -> typeS ==> bool1 -> typeS + (('f2f{}'.format(s), ('b2f{}'.format(B), 'a@1')), ('b2f{}'.format(s), a)), + (('i2i{}'.format(s), ('b2i{}'.format(B), 'a@1')), ('b2i{}'.format(s), a)), + (('u2u{}'.format(s), ('b2i{}'.format(B), 'a@1')), ('b2i{}'.format(s), a)), + + # floatS -> floatB -> intB ==> floatS -> intB + (('f2u{}'.format(B), ('f2f{}'.format(B), 'a@{}'.format(s))), ('f2u{}'.format(B), a)), + (('f2i{}'.format(B), ('f2f{}'.format(B), 'a@{}'.format(s))), ('f2i{}'.format(B), a)), + + # int? -> floatB -> floatS ==> int? -> floatS + (('f2f{}'.format(s), ('u2f{}'.format(B), a)), ('u2f{}'.format(s), a)), + (('f2f{}'.format(s), ('i2f{}'.format(B), a)), ('i2f{}'.format(s), a)), + + # intS -> intB -> floatB ==> intS -> floatB + (('u2f{}'.format(B), ('u2u{}'.format(B), 'a@{}'.format(s))), ('u2f{}'.format(B), a)), + (('i2f{}'.format(B), ('i2i{}'.format(B), 'a@{}'.format(s))), ('i2f{}'.format(B), a)), ]) +# mediump variants of the above +optimizations.extend([ + # int32 -> float32 -> float16 ==> int32 -> float16 + (('f2fmp', ('u2f32', 'a@32')), ('u2fmp', a)), + (('f2fmp', ('i2f32', 'a@32')), ('i2fmp', a)), + + # float32 -> float16 -> int16 ==> float32 -> int16 + (('f2u16', ('f2fmp', 'a@32')), ('f2u16', a)), + (('f2i16', ('f2fmp', 'a@32')), ('f2i16', a)), + + # float32 -> int32 -> int16 ==> float32 -> int16 + (('i2imp', ('f2u32', 'a@32')), ('f2ump', a)), + (('i2imp', ('f2i32', 'a@32')), ('f2imp', a)), + + # int32 -> int16 -> float16 ==> int32 -> float16 + (('u2f16', ('i2imp', 'a@32')), ('u2f16', a)), + (('i2f16', ('i2imp', 'a@32')), ('i2f16', a)), +]) + # Integer sizes for s in [8, 16, 32, 64]: optimizations.extend([ @@ -968,15 +1013,32 @@ optimizations.extend([ (('f2fmp', ('f2f32', 'a@16')), a), (('i2imp', ('i2i32', 'a@16')), a), (('i2imp', ('u2u32', 'a@16')), a), + + (('f2imp', ('f2f32', 'a@16')), ('f2i16', a)), + (('f2ump', ('f2f32', 'a@16')), ('f2u16', a)), + (('i2fmp', ('i2i32', 'a@16')), ('i2f16', a)), + (('u2fmp', ('u2u32', 'a@16')), ('u2f16', a)), + (('f2fmp', ('b2f32', 'a@1')), ('b2f16', a)), (('i2imp', ('b2i32', 'a@1')), ('b2i16', a)), (('i2imp', ('b2i32', 'a@1')), ('b2i16', a)), + + (('f2imp', ('b2f32', 'a@1')), ('b2i16', a)), + (('f2ump', ('b2f32', 'a@1')), ('b2i16', a)), + (('i2fmp', ('b2i32', 'a@1')), ('b2f16', a)), + (('u2fmp', ('b2i32', 'a@1')), ('b2f16', a)), + # Conversions to 16 bits would be lossy so they should only be removed if # the instruction was generated by the precision lowering pass. (('f2f32', ('f2fmp', 'a@32')), a), (('i2i32', ('i2imp', 'a@32')), a), (('u2u32', ('i2imp', 'a@32')), a), + (('i2i32', ('f2imp', 'a@32')), ('f2i32', a)), + (('u2u32', ('f2ump', 'a@32')), ('f2u32', a)), + (('f2f32', ('i2fmp', 'a@32')), ('i2f32', a)), + (('f2f32', ('u2fmp', 'a@32')), ('u2f32', a)), + (('ffloor', 'a(is_integral)'), a), (('fceil', 'a(is_integral)'), a), (('ftrunc', 'a(is_integral)'), a),