/* * Copyright 2025 Valve Corporation * SPDX-License-Identifier: MIT */ #include "float8.h" #include #include #include "bitscan.h" #include "u_math.h" #define E4M3FN_NAN 0xff #define E4M3FN_MAX 0x7e #define E5M2_NAN 0xfe #define E5M2_MAX 0x7b #define E5M2_INF 0x7c uint8_t _mesa_float_to_e4m3fn(float val) { /* This is a finite only format, out of range values (after rounding) * are converted to NaN. */ if (fabs(val) > 464.0f || isnan(val)) return E4M3FN_NAN; bool s = fui(val) & 0x80000000; int e = ((fui(val) >> 23) & 0xff) - 127 + 7; uint32_t m = fui(val) & 0x7fffff; uint8_t res = s ? 0x80 : 0; /* Zero, underflow. */ if (e < -3) return res; bool is_denorm = e <= 0; bool round_up = false; if (is_denorm) { unsigned offset = 1 - e; round_up |= m & ((1 << offset) - 1); m = (m | 0x800000) >> offset; } round_up |= m & 0x17ffff; if ((m & 0x080000) && round_up) { m += 0x100000; if (m & 0x800000) { m = 0; e += 1; } } if (!is_denorm) res |= (e << 3); res |= (m >> 20); return res; } uint8_t _mesa_float_to_e4m3fn_sat(float val) { if (val > 448.0f) return E4M3FN_MAX; else if (val < -448.0f) return 0x80 | E4M3FN_MAX; else return _mesa_float_to_e4m3fn(val); } float _mesa_e4m3fn_to_float(uint8_t val) { bool s = val & 0x80; uint32_t e = (val >> 3) & 0xf; uint32_t m = val & 0x7; if (e == 0xf && m == 0x7) return uif(0xffc00000); uint32_t res = s ? 0x80000000 : 0; if (e == 0 && m == 0) { /* Zero. */ } else if (e == 0) { /* Denorm. */ unsigned shift = (4 - util_last_bit(m)); res |= (127 - 6 - shift) << 23; res |= ((m << shift) & 0x7) << (23 - 3); } else { res |= (e + (127 - 7)) << 23; res |= m << (23 - 3); } return uif(res); } uint8_t _mesa_float_to_e5m2(float val) { bool s = fui(val) & 0x80000000; uint8_t res = s ? 0x80 : 0; if (isnan(val)) return E5M2_NAN; else if (fabs(val) >= 61440.0f) return res | E5M2_INF; int e = ((fui(val) >> 23) & 0xff) - 127 + 15; uint32_t m = fui(val) & 0x7fffff; /* Zero, underflow. */ if (e < -2) return res; bool is_denorm = e <= 0; bool round_up = false; if (is_denorm) { unsigned offset = 1 - e; round_up |= m & ((1 << offset) - 1); m = (m | 0x800000) >> offset; } round_up |= m & 0x2fffff; if ((m & 0x100000) && round_up) { m += 0x200000; if (m & 0x800000) { m = 0; e += 1; } } if (!is_denorm) res |= (e << 2); res |= (m >> 21); return res; } uint8_t _mesa_float_to_e5m2_sat(float val) { if (val > 57344.0f) return E5M2_MAX; else if (val < -57344.0f) return 0x80 | E5M2_MAX; else return _mesa_float_to_e5m2(val); } float _mesa_e5m2_to_float(uint8_t val) { bool s = val & 0x80; uint32_t e = (val >> 2) & 0x1f; uint32_t m = val & 0x3; if (e == 0x1f && m != 0) return uif(0xffc00000); uint32_t res = s ? 0x80000000 : 0; if (e == 0x1f) { /* Infinity. */ res |= 0x7f800000; } else if (e == 0 && m == 0) { /* Zero. */ } else if (e == 0) { /* Denorm. */ unsigned shift = (3 - util_last_bit(m)); res |= (127 - 14 - shift) << 23; res |= ((m << shift) & 0x3) << (23 - 2); } else { res |= (e + (127 - 15)) << 23; res |= m << (23 - 2); } return uif(res); }