nir/lower_alu: optimize min/max signed zeros

we don't usually need a multi-instruction lowering.

with the agx change in the next commit, honeykrisp results:

   Totals from 3589 (6.64% of 54019) affected shaders:
   MaxWaves: 3598144 -> 3598400 (+0.01%); split: +0.02%, -0.01%
   Instrs: 1445830 -> 1332394 (-7.85%)
   CodeSize: 10696356 -> 9742130 (-8.92%)
   Fills: 721 -> 723 (+0.28%); split: -0.14%, +0.42%
   Scratch: 3980 -> 3968 (-0.30%)
   ALU: 1156426 -> 1043198 (-9.79%)
   FSCIB: 1156426 -> 1043196 (-9.79%)
   IC: 267202 -> 267166 (-0.01%)
   GPRs: 208765 -> 208712 (-0.03%); split: -0.16%, +0.14%
   Uniforms: 683643 -> 683677 (+0.00%); split: -0.01%, +0.01%
   Preamble instrs: 1163325 -> 1159314 (-0.34%)

control results alone:

   Totals:
   Instrs: 110168 -> 107171 (-2.72%)

   Totals from 71 (22.26% of 319) affected shaders:
   Instrs: 48895 -> 45898 (-6.13%)

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Marek Olšák <maraeo@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35989>
This commit is contained in:
Alyssa Rosenzweig 2025-07-07 15:10:31 -04:00 committed by Marge Bot
parent 042adf3cc5
commit fc95397957

View file

@ -202,7 +202,6 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data)
nir_def *s1 = nir_ssa_for_alu_src(b, instr, 1);
bool max = instr->op == nir_op_fmax;
nir_def *iminmax = max ? nir_imax(b, s0, s1) : nir_imin(b, s0, s1);
/* Lower the fmin/fmax to a no_signed_zero fmin/fmax. This ensures that
* nir_lower_alu is idempotent, and allows the backend to implement
@ -212,7 +211,39 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data)
nir_def *fminmax = max ? nir_fmax(b, s0, s1) : nir_fmin(b, s0, s1);
b->fp_fast_math = instr->fp_fast_math;
lowered = nir_bcsel(b, nir_feq(b, s0, s1), iminmax, fminmax);
/* If we have a constant source, we can usually optimize */
if (s0->num_components == 1 && s0->bit_size == 32) {
for (unsigned i = 0; i < 2 && lowered == NULL; ++i) {
if (!nir_src_is_const(instr->src[i].src))
continue;
uint32_t x = nir_alu_src_as_uint(instr->src[i]);
bool pos_zero = x == fui(+0.0);
bool neg_zero = x == fui(-0.0);
nir_def *zero = i == 0 ? s0 : s1;
nir_def *other = i == 0 ? s1 : s0;
if (!pos_zero && !neg_zero) {
/* The lowering is only required when both sources are zero, so
* if we have a nonzero constant source, skip the lowering.
*/
lowered = fminmax;
} else if (pos_zero && max) {
/* max(x, +0.0) = +0.0 < x ? x : +0.0 */
lowered = nir_bcsel(b, nir_flt(b, zero, other), other, zero);
} else if (neg_zero && !max) {
/* min(x, -0.0) = x < -0.0 ? x : -0.0 */
lowered = nir_bcsel(b, nir_flt(b, other, zero), other, zero);
}
}
}
/* Fallback on the emulation */
if (!lowered) {
nir_def *iminmax = max ? nir_imax(b, s0, s1) : nir_imin(b, s0, s1);
lowered = nir_bcsel(b, nir_feq(b, s0, s1), iminmax, fminmax);
}
break;
}