From 830cd2dc47648ff2c426b539afc2798be519d282 Mon Sep 17 00:00:00 2001 From: Lorenzo Rossi Date: Wed, 6 May 2026 18:18:51 +0200 Subject: [PATCH] nir/opt_algebraic: Optimize mediump fadd/fmul done in highp When we do f2fmp(fadd(f2f32(a), f2f32(b))) we can always optimize it to fadd(a, b) and obtain the same result minus an intermediate rounding step, same for fmul. I verified this on CPU using a custom script with Berkley SoftFloat implementation, the results there are bit-for-bit identical except for NaN representations. Signed-off-by: Lorenzo Rossi Reviewed-by: Mel Henning Reviewed-by: Ashley Smith Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index a4aed476493..259bd5b07c5 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2000,11 +2000,6 @@ optimizations.extend([ (('f2u32', ('f2fmp', 'a@32')), ('f2u32', a), 'true', TestStatus.UNSUPPORTED), (('i2f32', ('i2imp', 'a@32')), ('i2f32', a), 'true', TestStatus.UNSUPPORTED), - # f16 -> f2f32 -> fmul32 -> f2fmp when the second operand is a constant - # The optimization only works when the constant can be safely represented with 16 bits - (('f2fmp', ('fmul(is_used_once,contract)', ('f2f32', 'a@16'), '#b(is_representable_as_f16)')), ('fmul', a, ('f2fmp', b)), 'true', TestStatus.UNSUPPORTED), - (('f2fmp', ('fadd(is_used_once,contract)', ('f2f32', 'a@16'), '#b(is_representable_as_f16)')), ('fadd', a, ('f2fmp', b)), 'true', TestStatus.UNSUPPORTED), - (('ffloor', 'a(is_integral)'), a), (('fceil', 'a(is_integral)'), a), (('ftrunc', 'a(is_integral)'), a), @@ -2053,6 +2048,14 @@ optimizations.extend([ (('ult', a, 0), False), ]) +# Doing a 32-bit fadd/fmul on 16-bit operands is equivalent to a 16-bit +# fadd/fmul, contract is required for the double rounding. +for op in ['fadd', 'fmul']: + optimizations += [ + (('f2fmp', (f'{op}(is_used_once,contract)', ('f2f32', 'a@16'), '#b(is_representable_as_f16)')), (op, a, ('f2fmp', b)), 'true', TestStatus.UNSUPPORTED), + (('f2fmp', (f'{op}(is_used_once,contract)', ('f2f32', 'a@16'), ('f2f32', 'b@16'))), (op, a, b), 'true', TestStatus.UNSUPPORTED), + ] + for bits in [16, 32, 64]: cond = '!(options->lower_doubles_options & nir_lower_fp64_full_software)' if bits == 64 else 'true' bcsel = 'bcsel@{}'.format(bits)