mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-15 04:10:35 +01:00
nir: add fcanonicalize
v2(Georg Lehmann): Always remove fcanonicalize if denorms must be neither flushed nor preserved. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39180>
This commit is contained in:
parent
43d998df84
commit
625afb0d29
4 changed files with 56 additions and 2 deletions
|
|
@ -216,6 +216,10 @@ def unop_numeric_convert(name, out_type, in_type, const_expr, description = ""):
|
|||
|
||||
unop("mov", tuint, "src0")
|
||||
|
||||
# Flush subnormal values to zero with the same sign if required by
|
||||
# the float controls execution mode. Any NaN might return a different NaN.
|
||||
unop("fcanonicalize", tfloat, "src0")
|
||||
|
||||
unop("ineg", tint, "src0 == u_intN_min(bit_size) ? src0 : -src0")
|
||||
unop("fneg", tfloat, "-src0")
|
||||
unop("inot", tint, "~src0", description = "Invert every bit of the integer")
|
||||
|
|
|
|||
|
|
@ -43,6 +43,10 @@ has_fmulz = '(options->has_fmulz || \
|
|||
(options->has_fmulz_no_denorms && \
|
||||
!nir_is_denorm_preserve(info->float_controls_execution_mode, 32)))'
|
||||
|
||||
denorm_ftz_16 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'
|
||||
denorm_ftz_32 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 32)'
|
||||
denorm_ftz_64 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 64)'
|
||||
|
||||
ignore_exact = nir_algebraic.ignore_exact
|
||||
|
||||
# Written in the form (<search>, <replace>) where <search> is an expression
|
||||
|
|
@ -146,6 +150,25 @@ def add_fabs_fneg(pattern, replacements, commutative = True):
|
|||
result.append(to_tuple(curr))
|
||||
return result
|
||||
|
||||
optimize_fcanonicalize = [
|
||||
# Eliminate all fcanonicalize if we are not required to flush denormals.
|
||||
# Technically this is inexact for the case where we don't know the denorms
|
||||
# are preserved - but so is any pattern where one float opcode is replaced by
|
||||
# another, because we don't know if they flush the same way.
|
||||
# Constant folding would also not flush, so there is already a lot of handwaving
|
||||
# involved, and this mode is supposed to be fast, not 100% reproducible.
|
||||
(('fcanonicalize', 'a@16'), a, '!'+denorm_ftz_16),
|
||||
(('fcanonicalize', 'a@32'), a, '!'+denorm_ftz_32),
|
||||
(('fcanonicalize', 'a@64'), a, '!'+denorm_ftz_64),
|
||||
|
||||
# If denormals are required to be flushed we can still
|
||||
# eliminate it if any denormals are already flushed or will be flushed.
|
||||
(('fcanonicalize(is_only_used_as_float)', a), a),
|
||||
(('fcanonicalize', 'a(is_created_as_float)'), a, 'true', TestStatus.UNSUPPORTED),
|
||||
|
||||
# Integral numbers are not denormal.
|
||||
(('fcanonicalize', 'a(is_integral)'), a),
|
||||
]
|
||||
|
||||
optimizations = [
|
||||
# These will be recreated by late_algebraic if supported.
|
||||
|
|
@ -565,6 +588,8 @@ optimizations.extend([
|
|||
(('imul', ('ishl', a, '#b'), '#c'), ('imul', a, ('ishl', c, b))),
|
||||
])
|
||||
|
||||
optimizations.extend(optimize_fcanonicalize)
|
||||
|
||||
# Care must be taken here. Shifts in NIR uses only the lower log2(bitsize)
|
||||
# bits of the second source. These replacements must correctly handle the
|
||||
# case where (b % bitsize) + (c % bitsize) >= bitsize.
|
||||
|
|
@ -3434,12 +3459,12 @@ for op in ['fpow']:
|
|||
(('bcsel', a, (op, b, c), (op + '(is_used_once)', d, c)), (op, ('bcsel', a, b, d), c)),
|
||||
]
|
||||
|
||||
for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fsin_amd', 'fcos_amd', 'fsin_mdg', 'fcos_mdg', 'fsin_agx', 'fneg', 'fabs', 'fsign']:
|
||||
for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fsin_amd', 'fcos_amd', 'fsin_mdg', 'fcos_mdg', 'fsin_agx', 'fneg', 'fabs', 'fsign', 'fcanonicalize']:
|
||||
optimizations += [
|
||||
(('bcsel', c, (op + '(is_used_once)', a), (op + '(is_used_once)', b)), (op, ('bcsel', c, a, b))),
|
||||
]
|
||||
|
||||
for op in ['ineg', 'iabs', 'inot', 'isign']:
|
||||
for op in ['ineg', 'iabs', 'inot', 'isign', 'fcanonicalize']:
|
||||
optimizations += [
|
||||
((op, ('bcsel', c, '#a', '#b')), ('bcsel', c, (op, a), (op, b))),
|
||||
]
|
||||
|
|
@ -4004,6 +4029,12 @@ late_optimizations.extend([
|
|||
(('bitz', ('inot', a), b), ('bitnz', a, b)),
|
||||
])
|
||||
|
||||
late_optimizations += [
|
||||
# If we can't eliminate it, lower it so that backends don't have to deal with
|
||||
# it.
|
||||
(('fcanonicalize', a), ('fmul', a, 1.0), '!options->has_fcanonicalize'),
|
||||
]
|
||||
|
||||
# A few more extract cases we'd rather leave late
|
||||
for N in [16, 32]:
|
||||
aN = 'a@{0}'.format(N)
|
||||
|
|
|
|||
|
|
@ -1065,4 +1065,18 @@ cannot_add_output_modifier(const nir_search_state *state,
|
|||
return !can_add_output_modifier(state, instr, src, num_components, swizzle);
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
is_created_as_float(const nir_search_state *state, const nir_alu_instr *instr, unsigned src,
|
||||
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
||||
{
|
||||
nir_alu_instr *src_alu = nir_src_as_alu(instr->src[src].src);
|
||||
|
||||
if (src_alu == NULL)
|
||||
return false;
|
||||
|
||||
nir_alu_type output_type = nir_op_infos[src_alu->op].output_type;
|
||||
return nir_alu_type_get_base_type(output_type) == nir_type_float;
|
||||
}
|
||||
|
||||
#endif /* _NIR_SEARCH_ */
|
||||
|
|
|
|||
|
|
@ -649,6 +649,11 @@ typedef struct nir_shader_compiler_options {
|
|||
*/
|
||||
bool has_fmulz_no_denorms;
|
||||
|
||||
/** Backend supports fcanonicalize, if not set fcanonicalize will be lowered
|
||||
* to fmul(a, 1.0)
|
||||
*/
|
||||
bool has_fcanonicalize;
|
||||
|
||||
/** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */
|
||||
bool has_find_msb_rev;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue