From 625afb0d29d4ba37d4f72cd93fa1496cfd987e2d Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 4 Sep 2020 13:30:39 +0100 Subject: [PATCH] nir: add fcanonicalize v2(Georg Lehmann): Always remove fcanonicalize if denorms must be neither flushed nor preserved. Reviewed-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir_opcodes.py | 4 +++ src/compiler/nir/nir_opt_algebraic.py | 35 +++++++++++++++++-- src/compiler/nir/nir_search_helpers.h | 14 ++++++++ .../nir/nir_shader_compiler_options.h | 5 +++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 83d7a4bc0c1..a34289855ce 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -216,6 +216,10 @@ def unop_numeric_convert(name, out_type, in_type, const_expr, description = ""): unop("mov", tuint, "src0") +# Flush subnormal values to zero with the same sign if required by +# the float controls execution mode. Any NaN might return a different NaN. +unop("fcanonicalize", tfloat, "src0") + unop("ineg", tint, "src0 == u_intN_min(bit_size) ? src0 : -src0") unop("fneg", tfloat, "-src0") unop("inot", tint, "~src0", description = "Invert every bit of the integer") diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 6c17e14365b..e93ece4d950 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -43,6 +43,10 @@ has_fmulz = '(options->has_fmulz || \ (options->has_fmulz_no_denorms && \ !nir_is_denorm_preserve(info->float_controls_execution_mode, 32)))' +denorm_ftz_16 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)' +denorm_ftz_32 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 32)' +denorm_ftz_64 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 64)' + ignore_exact = nir_algebraic.ignore_exact # Written in the form (, ) where is an expression @@ -146,6 +150,25 @@ def add_fabs_fneg(pattern, replacements, commutative = True): result.append(to_tuple(curr)) return result +optimize_fcanonicalize = [ + # Eliminate all fcanonicalize if we are not required to flush denormals. + # Technically this is inexact for the case where we don't know the denorms + # are preserved - but so is any pattern where one float opcode is replaced by + # another, because we don't know if they flush the same way. + # Constant folding would also not flush, so there is already a lot of handwaving + # involved, and this mode is supposed to be fast, not 100% reproducible. + (('fcanonicalize', 'a@16'), a, '!'+denorm_ftz_16), + (('fcanonicalize', 'a@32'), a, '!'+denorm_ftz_32), + (('fcanonicalize', 'a@64'), a, '!'+denorm_ftz_64), + + # If denormals are required to be flushed we can still + # eliminate it if any denormals are already flushed or will be flushed. + (('fcanonicalize(is_only_used_as_float)', a), a), + (('fcanonicalize', 'a(is_created_as_float)'), a, 'true', TestStatus.UNSUPPORTED), + + # Integral numbers are not denormal. + (('fcanonicalize', 'a(is_integral)'), a), +] optimizations = [ # These will be recreated by late_algebraic if supported. @@ -565,6 +588,8 @@ optimizations.extend([ (('imul', ('ishl', a, '#b'), '#c'), ('imul', a, ('ishl', c, b))), ]) +optimizations.extend(optimize_fcanonicalize) + # Care must be taken here. Shifts in NIR uses only the lower log2(bitsize) # bits of the second source. These replacements must correctly handle the # case where (b % bitsize) + (c % bitsize) >= bitsize. @@ -3434,12 +3459,12 @@ for op in ['fpow']: (('bcsel', a, (op, b, c), (op + '(is_used_once)', d, c)), (op, ('bcsel', a, b, d), c)), ] -for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fsin_amd', 'fcos_amd', 'fsin_mdg', 'fcos_mdg', 'fsin_agx', 'fneg', 'fabs', 'fsign']: +for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fsin_amd', 'fcos_amd', 'fsin_mdg', 'fcos_mdg', 'fsin_agx', 'fneg', 'fabs', 'fsign', 'fcanonicalize']: optimizations += [ (('bcsel', c, (op + '(is_used_once)', a), (op + '(is_used_once)', b)), (op, ('bcsel', c, a, b))), ] -for op in ['ineg', 'iabs', 'inot', 'isign']: +for op in ['ineg', 'iabs', 'inot', 'isign', 'fcanonicalize']: optimizations += [ ((op, ('bcsel', c, '#a', '#b')), ('bcsel', c, (op, a), (op, b))), ] @@ -4004,6 +4029,12 @@ late_optimizations.extend([ (('bitz', ('inot', a), b), ('bitnz', a, b)), ]) +late_optimizations += [ + # If we can't eliminate it, lower it so that backends don't have to deal with + # it. + (('fcanonicalize', a), ('fmul', a, 1.0), '!options->has_fcanonicalize'), +] + # A few more extract cases we'd rather leave late for N in [16, 32]: aN = 'a@{0}'.format(N) diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 6f24d64ebbc..0307a02a30c 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -1065,4 +1065,18 @@ cannot_add_output_modifier(const nir_search_state *state, return !can_add_output_modifier(state, instr, src, num_components, swizzle); } + +static inline bool +is_created_as_float(const nir_search_state *state, const nir_alu_instr *instr, unsigned src, + UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +{ + nir_alu_instr *src_alu = nir_src_as_alu(instr->src[src].src); + + if (src_alu == NULL) + return false; + + nir_alu_type output_type = nir_op_infos[src_alu->op].output_type; + return nir_alu_type_get_base_type(output_type) == nir_type_float; +} + #endif /* _NIR_SEARCH_ */ diff --git a/src/compiler/nir/nir_shader_compiler_options.h b/src/compiler/nir/nir_shader_compiler_options.h index 790148485e3..36565059ba6 100644 --- a/src/compiler/nir/nir_shader_compiler_options.h +++ b/src/compiler/nir/nir_shader_compiler_options.h @@ -649,6 +649,11 @@ typedef struct nir_shader_compiler_options { */ bool has_fmulz_no_denorms; + /** Backend supports fcanonicalize, if not set fcanonicalize will be lowered + * to fmul(a, 1.0) + */ + bool has_fcanonicalize; + /** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */ bool has_find_msb_rev;