mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
nir: push down fabs for atan
worse in terms of NIR instruction count but lets the fabs fold easier. (on agx, which has fabs on comparisons and fmul but not on bcsel. should be no worse if ISA has fabs on all 3.) Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30934>
This commit is contained in:
parent
8579375777
commit
0a4a0df283
1 changed files with 9 additions and 7 deletions
|
|
@ -170,12 +170,14 @@ nir_atan(nir_builder *b, nir_def *y_over_x)
|
|||
/*
|
||||
* range-reduction, first step:
|
||||
*
|
||||
* / |y_over_x| if |y_over_x| <= 1.0;
|
||||
* x = <
|
||||
* \ 1.0 / |y_over_x| otherwise
|
||||
* / y_over_x if |y_over_x| <= 1.0;
|
||||
* u = <
|
||||
* \ 1.0 / y_over_x otherwise
|
||||
*
|
||||
* x = |u| for the corrected sign.
|
||||
*/
|
||||
nir_def *x = nir_bcsel(b, nir_fle_imm(b, abs_y_over_x, 1.0),
|
||||
abs_y_over_x, nir_frcp(b, abs_y_over_x));
|
||||
nir_def *u = nir_bcsel(b, nir_fle_imm(b, abs_y_over_x, 1.0),
|
||||
y_over_x, nir_frcp(b, y_over_x));
|
||||
|
||||
/*
|
||||
* approximate atan by evaluating polynomial using Horner's method:
|
||||
|
|
@ -190,14 +192,14 @@ nir_atan(nir_builder *b, nir_def *y_over_x)
|
|||
-0.3326756418091246f, 0.9999793128310355f
|
||||
};
|
||||
|
||||
nir_def *x_2 = nir_fmul(b, x, x);
|
||||
nir_def *x_2 = nir_fmul(b, u, u);
|
||||
nir_def *res = nir_imm_floatN_t(b, coeffs[0], bit_size);
|
||||
|
||||
for (unsigned i = 1; i < ARRAY_SIZE(coeffs); ++i) {
|
||||
res = nir_ffma_imm2(b, res, x_2, coeffs[i]);
|
||||
}
|
||||
|
||||
nir_def *tmp = nir_fmul(b, x, res);
|
||||
nir_def *tmp = nir_fmul(b, nir_fabs(b, u), res);
|
||||
|
||||
/* range-reduction fixup */
|
||||
tmp = nir_ffma(b,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue