diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 9d159306563..49814215a45 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -927,10 +927,29 @@ opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], False, "", opcode("fdph_replicated", 0, tfloat, [3, 4], [tfloat, tfloat], False, "", "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w") -binop("fmin", tfloat, _2src_commutative + associative, "fmin(src0, src1)") +# The C fmin/fmax functions have implementation-defined behaviour for signed +# zeroes. However, SPIR-V requires: +# +# fmin(-0, +0) = -0 +# fmax(+0, -0) = +0 +# +# The NIR opcodes match SPIR-V. Furthermore, the NIR opcodes are commutative, so +# we must also ensure: +# +# fmin(+0, -0) = -0 +# fmax(-0, +0) = +0 +# +# To implement the constant folding, when the sources are equal, we use the +# min/max of the bit patterns which will order the signed zeroes while +# preserving all other values. +for op, macro in [("fmin", "MIN2"), ("fmax", "MAX2")]: + binop(op, tfloat, _2src_commutative + associative, + "bit_size == 64 ? " + + f"(src0 == src1 ? uid({macro}((int64_t)dui(src0), (int64_t)dui(src1))) : {op}(src0, src1)) :" + f"(src0 == src1 ? uif({macro}((int32_t)fui(src0), (int32_t)fui(src1))) : {op}f(src0, src1))") + binop("imin", tint, _2src_commutative + associative, "MIN2(src0, src1)") binop("umin", tuint, _2src_commutative + associative, "MIN2(src0, src1)") -binop("fmax", tfloat, _2src_commutative + associative, "fmax(src0, src1)") binop("imax", tint, _2src_commutative + associative, "MAX2(src0, src1)") binop("umax", tuint, _2src_commutative + associative, "MAX2(src0, src1)")