nir/opt_algebraic: move fsat last for fsqrt(fsat(a))

This should be exact, even for all special values:

fsqrt(NaN) -> NaN
fsqrt(-0.0) -> 0.0
fsqrt(-Inf) -> NaN
fsqrt(negative finite) -> NaN

So all of these get saturated to +0.0

All numbers >= 1.0 will have a square root >= 1.0,
which will be saturate to 1.0

Moving the fsat guarantees that it can use an output modifier
for hardware that has those, and shouldn't harm other hardware either.

Foz-DB Navi21:
Totals from 255 (0.31% of 82151) affected shaders:
Instrs: 664906 -> 664194 (-0.11%)
CodeSize: 3623500 -> 3619188 (-0.12%)
Latency: 11336397 -> 11335688 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 2716430 -> 2715726 (-0.03%); split: -0.03%, +0.00%
VALU: 442603 -> 441891 (-0.16%)

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39202>
This commit is contained in:
Georg Lehmann 2026-01-07 23:35:02 +01:00 committed by Marge Bot
parent 6bd5c037f2
commit 93d05cdfd8
2 changed files with 31 additions and 0 deletions

View file

@ -3834,6 +3834,8 @@ late_optimizations.extend([
(('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
(('fsqrt', ('fsat(is_used_once)', 'a(cannot_add_output_modifier)')), ('fsat', ('fsqrt', a))),
(('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates'),
(('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates'),
(('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates'),

View file

@ -1036,4 +1036,33 @@ w_is_zero(const nir_search_state *state, const nir_alu_instr *instr, unsigned sr
return compare_component(instr, src, swizzle[3], 0.0);
}
static inline bool
can_add_output_modifier(const nir_search_state *state,
const nir_alu_instr *instr,
unsigned src,
UNUSED unsigned num_components,
UNUSED const uint8_t *swizzle)
{
nir_alu_instr *src_alu = nir_src_as_alu(instr->src[src].src);
if (src_alu == NULL)
return false;
if (!list_is_singular(&src_alu->def.uses))
return false;
nir_alu_type output_type = nir_op_infos[src_alu->op].output_type;
return nir_alu_type_get_base_type(output_type) == nir_type_float;
}
static inline bool
cannot_add_output_modifier(const nir_search_state *state,
const nir_alu_instr *instr,
unsigned src,
unsigned num_components,
const uint8_t *swizzle)
{
return !can_add_output_modifier(state, instr, src, num_components, swizzle);
}
#endif /* _NIR_SEARCH_ */