From 93d05cdfd801e52027cf1115169ec314874070df Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 7 Jan 2026 23:35:02 +0100 Subject: [PATCH] nir/opt_algebraic: move fsat last for fsqrt(fsat(a)) This should be exact, even for all special values: fsqrt(NaN) -> NaN fsqrt(-0.0) -> 0.0 fsqrt(-Inf) -> NaN fsqrt(negative finite) -> NaN So all of these get saturated to +0.0 All numbers >= 1.0 will have a square root >= 1.0, which will be saturate to 1.0 Moving the fsat guarantees that it can use an output modifier for hardware that has those, and shouldn't harm other hardware either. Foz-DB Navi21: Totals from 255 (0.31% of 82151) affected shaders: Instrs: 664906 -> 664194 (-0.11%) CodeSize: 3623500 -> 3619188 (-0.12%) Latency: 11336397 -> 11335688 (-0.01%); split: -0.01%, +0.00% InvThroughput: 2716430 -> 2715726 (-0.03%); split: -0.03%, +0.00% VALU: 442603 -> 441891 (-0.16%) Reviewed-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 2 ++ src/compiler/nir/nir_search_helpers.h | 29 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 5918682c09b..fad48dc2f3d 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -3834,6 +3834,8 @@ late_optimizations.extend([ (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))), + (('fsqrt', ('fsat(is_used_once)', 'a(cannot_add_output_modifier)')), ('fsat', ('fsqrt', a))), + (('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates'), (('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates'), (('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates'), diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index f73ac1a0c69..3845a0443dc 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -1036,4 +1036,33 @@ w_is_zero(const nir_search_state *state, const nir_alu_instr *instr, unsigned sr return compare_component(instr, src, swizzle[3], 0.0); } +static inline bool +can_add_output_modifier(const nir_search_state *state, + const nir_alu_instr *instr, + unsigned src, + UNUSED unsigned num_components, + UNUSED const uint8_t *swizzle) +{ + nir_alu_instr *src_alu = nir_src_as_alu(instr->src[src].src); + + if (src_alu == NULL) + return false; + + if (!list_is_singular(&src_alu->def.uses)) + return false; + + nir_alu_type output_type = nir_op_infos[src_alu->op].output_type; + return nir_alu_type_get_base_type(output_type) == nir_type_float; +} + +static inline bool +cannot_add_output_modifier(const nir_search_state *state, + const nir_alu_instr *instr, + unsigned src, + unsigned num_components, + const uint8_t *swizzle) +{ + return !can_add_output_modifier(state, instr, src, num_components, swizzle); +} + #endif /* _NIR_SEARCH_ */