mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
nir/lower_alu: optimize min/max signed zeros
we don't usually need a multi-instruction lowering. with the agx change in the next commit, honeykrisp results: Totals from 3589 (6.64% of 54019) affected shaders: MaxWaves: 3598144 -> 3598400 (+0.01%); split: +0.02%, -0.01% Instrs: 1445830 -> 1332394 (-7.85%) CodeSize: 10696356 -> 9742130 (-8.92%) Fills: 721 -> 723 (+0.28%); split: -0.14%, +0.42% Scratch: 3980 -> 3968 (-0.30%) ALU: 1156426 -> 1043198 (-9.79%) FSCIB: 1156426 -> 1043196 (-9.79%) IC: 267202 -> 267166 (-0.01%) GPRs: 208765 -> 208712 (-0.03%); split: -0.16%, +0.14% Uniforms: 683643 -> 683677 (+0.00%); split: -0.01%, +0.01% Preamble instrs: 1163325 -> 1159314 (-0.34%) control results alone: Totals: Instrs: 110168 -> 107171 (-2.72%) Totals from 71 (22.26% of 319) affected shaders: Instrs: 48895 -> 45898 (-6.13%) Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Marek Olšák <maraeo@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35989>
This commit is contained in:
parent
042adf3cc5
commit
fc95397957
1 changed files with 33 additions and 2 deletions
|
|
@ -202,7 +202,6 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data)
|
|||
nir_def *s1 = nir_ssa_for_alu_src(b, instr, 1);
|
||||
|
||||
bool max = instr->op == nir_op_fmax;
|
||||
nir_def *iminmax = max ? nir_imax(b, s0, s1) : nir_imin(b, s0, s1);
|
||||
|
||||
/* Lower the fmin/fmax to a no_signed_zero fmin/fmax. This ensures that
|
||||
* nir_lower_alu is idempotent, and allows the backend to implement
|
||||
|
|
@ -212,7 +211,39 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data)
|
|||
nir_def *fminmax = max ? nir_fmax(b, s0, s1) : nir_fmin(b, s0, s1);
|
||||
b->fp_fast_math = instr->fp_fast_math;
|
||||
|
||||
lowered = nir_bcsel(b, nir_feq(b, s0, s1), iminmax, fminmax);
|
||||
/* If we have a constant source, we can usually optimize */
|
||||
if (s0->num_components == 1 && s0->bit_size == 32) {
|
||||
for (unsigned i = 0; i < 2 && lowered == NULL; ++i) {
|
||||
if (!nir_src_is_const(instr->src[i].src))
|
||||
continue;
|
||||
|
||||
uint32_t x = nir_alu_src_as_uint(instr->src[i]);
|
||||
bool pos_zero = x == fui(+0.0);
|
||||
bool neg_zero = x == fui(-0.0);
|
||||
nir_def *zero = i == 0 ? s0 : s1;
|
||||
nir_def *other = i == 0 ? s1 : s0;
|
||||
|
||||
if (!pos_zero && !neg_zero) {
|
||||
/* The lowering is only required when both sources are zero, so
|
||||
* if we have a nonzero constant source, skip the lowering.
|
||||
*/
|
||||
lowered = fminmax;
|
||||
} else if (pos_zero && max) {
|
||||
/* max(x, +0.0) = +0.0 < x ? x : +0.0 */
|
||||
lowered = nir_bcsel(b, nir_flt(b, zero, other), other, zero);
|
||||
} else if (neg_zero && !max) {
|
||||
/* min(x, -0.0) = x < -0.0 ? x : -0.0 */
|
||||
lowered = nir_bcsel(b, nir_flt(b, other, zero), other, zero);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Fallback on the emulation */
|
||||
if (!lowered) {
|
||||
nir_def *iminmax = max ? nir_imax(b, s0, s1) : nir_imin(b, s0, s1);
|
||||
lowered = nir_bcsel(b, nir_feq(b, s0, s1), iminmax, fminmax);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue