mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-31 13:30:42 +01:00
nir/opt_algebraic: move fsat last for fsqrt(fsat(a))
This should be exact, even for all special values: fsqrt(NaN) -> NaN fsqrt(-0.0) -> 0.0 fsqrt(-Inf) -> NaN fsqrt(negative finite) -> NaN So all of these get saturated to +0.0 All numbers >= 1.0 will have a square root >= 1.0, which will be saturate to 1.0 Moving the fsat guarantees that it can use an output modifier for hardware that has those, and shouldn't harm other hardware either. Foz-DB Navi21: Totals from 255 (0.31% of 82151) affected shaders: Instrs: 664906 -> 664194 (-0.11%) CodeSize: 3623500 -> 3619188 (-0.12%) Latency: 11336397 -> 11335688 (-0.01%); split: -0.01%, +0.00% InvThroughput: 2716430 -> 2715726 (-0.03%); split: -0.03%, +0.00% VALU: 442603 -> 441891 (-0.16%) Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39202>
This commit is contained in:
parent
6bd5c037f2
commit
93d05cdfd8
2 changed files with 31 additions and 0 deletions
|
|
@ -3834,6 +3834,8 @@ late_optimizations.extend([
|
|||
|
||||
(('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
|
||||
|
||||
(('fsqrt', ('fsat(is_used_once)', 'a(cannot_add_output_modifier)')), ('fsat', ('fsqrt', a))),
|
||||
|
||||
(('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates'),
|
||||
(('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates'),
|
||||
(('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates'),
|
||||
|
|
|
|||
|
|
@ -1036,4 +1036,33 @@ w_is_zero(const nir_search_state *state, const nir_alu_instr *instr, unsigned sr
|
|||
return compare_component(instr, src, swizzle[3], 0.0);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
can_add_output_modifier(const nir_search_state *state,
|
||||
const nir_alu_instr *instr,
|
||||
unsigned src,
|
||||
UNUSED unsigned num_components,
|
||||
UNUSED const uint8_t *swizzle)
|
||||
{
|
||||
nir_alu_instr *src_alu = nir_src_as_alu(instr->src[src].src);
|
||||
|
||||
if (src_alu == NULL)
|
||||
return false;
|
||||
|
||||
if (!list_is_singular(&src_alu->def.uses))
|
||||
return false;
|
||||
|
||||
nir_alu_type output_type = nir_op_infos[src_alu->op].output_type;
|
||||
return nir_alu_type_get_base_type(output_type) == nir_type_float;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
cannot_add_output_modifier(const nir_search_state *state,
|
||||
const nir_alu_instr *instr,
|
||||
unsigned src,
|
||||
unsigned num_components,
|
||||
const uint8_t *swizzle)
|
||||
{
|
||||
return !can_add_output_modifier(state, instr, src, num_components, swizzle);
|
||||
}
|
||||
|
||||
#endif /* _NIR_SEARCH_ */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue