mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
nir/lower_double_ops: handle signed zero with min/max
Ensure the following identities hold to match IEEE-754-2019 and upcoming NIR: min(-0, +0) = -0 min(+0, -0) = -0 max(-0, +0) = +0 max(+0, -0) = +0 NVK uses this lowering. In a simple compute shader using fmin64 on an SSBO with signed zero preserve required, testing the effect of this patch, the instruction count goes from 47->52. Obviously I'm not thrilled by that but I also couldn't find any obvious way of mitigating the issue. (Maybe NVIDIA has special hardware support here. By instruction count, lowering all the way to int64 is a loss, though I don't know how to count cycles on NVIDIA.) Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30075>
This commit is contained in:
parent
26de3d5366
commit
4ab3d95c11
1 changed files with 17 additions and 0 deletions
|
|
@ -499,6 +499,23 @@ lower_minmax(nir_builder *b, nir_op cmp, nir_def *src0, nir_def *src1)
|
|||
nir_def *cmp_res = nir_build_alu2(b, cmp, src0, src1);
|
||||
b->exact = false;
|
||||
nir_def *take_src0 = nir_ior(b, src1_is_nan, cmp_res);
|
||||
|
||||
/* IEEE-754-2019 requires that fmin/fmax compare -0 < 0, but -0 and 0 are
|
||||
* indistinguishable for flt/fge. So, we fix up signed zeroes.
|
||||
*/
|
||||
if (nir_is_float_control_signed_zero_preserve(b->fp_fast_math, 64)) {
|
||||
nir_def *src0_is_negzero = nir_ieq_imm(b, src0, 1ull << 63);
|
||||
nir_def *src1_is_poszero = nir_ieq_imm(b, src1, 0x0);
|
||||
nir_def *neg_pos_zero = nir_iand(b, src0_is_negzero, src1_is_poszero);
|
||||
|
||||
if (cmp == nir_op_flt) {
|
||||
take_src0 = nir_ior(b, take_src0, neg_pos_zero);
|
||||
} else {
|
||||
assert(cmp == nir_op_fge);
|
||||
take_src0 = nir_iand(b, take_src0, nir_inot(b, neg_pos_zero));
|
||||
}
|
||||
}
|
||||
|
||||
return nir_bcsel(b, take_src0, src0, src1);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue