diff --git a/.pick_status.json b/.pick_status.json index e8c2f7d7838..7c376843d9e 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3444,7 +3444,7 @@ "description": "nir: fix nir_round_int_to_float for fp16", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/compiler/nir/nir_conversion_builder.h b/src/compiler/nir/nir_conversion_builder.h index e24dd1cbb18..692c5dfedb2 100644 --- a/src/compiler/nir/nir_conversion_builder.h +++ b/src/compiler/nir/nir_conversion_builder.h @@ -24,6 +24,7 @@ #ifndef NIR_CONVERSION_BUILDER_H #define NIR_CONVERSION_BUILDER_H +#include "util/half_float.h" #include "util/u_math.h" #include "nir_builder.h" #include "nir_builtin_builder.h" @@ -162,6 +163,29 @@ nir_round_int_to_float(nir_builder *b, nir_def *src, } UNREACHABLE("unexpected rounding mode"); } else { + /* For conversions to FP16 we need to clamp the input against the fp16 + * max value when rounding towards zero or down. The reason for that is + * that for integer values outside of FP16 finite value range we could + * get Infinity, which would be incorrect rounding in those cases. + * + * Furthermore, we only need to do the clamping for integers bigger than + * 32 bits, because the lowering below will already clamp 16 bit integers + * correctly. + * + * This isn't a problem for FP32 or FP64 floats as integers can't exceed + * the finite value ranges. + */ + if (dest_bit_size == 16 && src->bit_size >= 32) { + switch (round) { + case nir_rounding_mode_rtz: + case nir_rounding_mode_rd: + src = nir_umin_imm(b, src, FP16_MAX_F); + break; + default: + break; + } + } + nir_def *mantissa_bit_size = nir_imm_int(b, mantissa_bits); nir_def *msb = nir_imax(b, nir_ufind_msb(b, src), mantissa_bit_size); nir_def *bits_to_lose = nir_isub(b, msb, mantissa_bit_size); diff --git a/src/util/half_float.h b/src/util/half_float.h index 6961e1ed618..cc49251d229 100644 --- a/src/util/half_float.h +++ b/src/util/half_float.h @@ -43,6 +43,7 @@ extern "C" { #define FP16_ONE ((uint16_t) 0x3c00) #define FP16_ZERO ((uint16_t) 0) +#define FP16_MAX_F 65504.0 uint16_t _mesa_float_to_half_slow(float val); float _mesa_half_to_float_slow(uint16_t val);