diff --git a/src/compiler/glsl/float32.glsl b/src/compiler/glsl/float32.glsl new file mode 100644 index 00000000000..70eb998fb1c --- /dev/null +++ b/src/compiler/glsl/float32.glsl @@ -0,0 +1,629 @@ +/* + * The implementations contained in this file are heavily based on the + * implementations found in the Berkeley SoftFloat library. As such, they are + * licensed under the same 3-clause BSD license: + * + * License for Berkeley SoftFloat Release 3e + * + * John R. Hauser + * 2018 January 20 + * + * The following applies to the whole of SoftFloat Release 3e as well as to + * each source file individually. + * + * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the + * University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions, and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#version 450 +#extension GL_ARB_shader_bit_encoding : enable +#extension GL_EXT_shader_integer_mix : enable + +/* Enable this just to suppress warnings about __ */ +#extension GL_EXT_spirv_intrinsics : enable + +#pragma warning(off) + +/* Software IEEE floating-point rounding mode. + * GLSL spec section "4.7.1 Range and Precision": + * The rounding mode cannot be set and is undefined. + * But here, we are able to define the rounding mode at the compilation time. + */ +#define FLOAT_ROUND_NEAREST_EVEN 0 +#define FLOAT_ROUND_TO_ZERO 1 +#define FLOAT_ROUND_DOWN 2 +#define FLOAT_ROUND_UP 3 +#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN + +/* Relax propagation of NaN. Binary operations with a NaN source will still + * produce a NaN result, but it won't follow strict IEEE rules. + */ +#define RELAXED_NAN_PROPAGATION + +/* Returns the number of leading 0 bits before the most-significant 1 bit of + * `a'. If `a' is zero, 32 is returned. + */ +int +__countLeadingZeros32(uint a) +{ + return 31 - findMSB(a); +} + +/* If a shader is in the soft-fp32 path, it almost certainly has register + * pressure problems. Choose a method to exchange two values that does not + * require a temporary. + */ +#define EXCHANGE(a, b) \ + do { \ + a ^= b; \ + b ^= a; \ + a ^= b; \ + } while (false) + +/* Shifts the 32-bit value `a` right by the number of bits given in `count'. + * If any nonzero bits are shifted off, they are "jammed" into the least + * significant bit of the result by setting the least significant bit to 1. + * The value of `count' can be arbitrarily large; in particular, if `count' is + * greater than 32, the result will be either 0 or 1, depending on whether `a` + * is zero or nonzero. + */ +uint +__shift32RightJamming(uint a, int count) +{ + int negCount = (-count) & 31; + + return mix(uint(a != 0), (a >> count) | uint(a<>7; + zFrac &= ~uint(((roundBits ^ 0x40) == 0) && roundNearestEven); + + return __packFloat32(zSign, mix(zExp, 0, zFrac == 0u), zFrac); +} + + +/* Absolute value of a Float32 : + * Clear the sign bit + */ +uint +__fabs32(uint a) +{ + return a & 0x7FFFFFFFu; +} + +/* Returns 1 if the single-precision floating-point value `a' is a NaN; + * otherwise returns 0. + */ +bool +__is_nan(uint a) +{ + /* It should be safe to use the native single-precision isnan() regardless + * of rounding mode or denorm flushing settings. + */ + return isnan(uintBitsToFloat(a)); +} + +/* Negate value of a Float32 : + * Toggle the sign bit + */ +uint +__fneg32(uint a) +{ + return a ^ (1u << 31); +} + +uint +__fsign32(uint a) +{ + return mix((a & 0x80000000u) | floatBitsToUint(1.0), 0u, (a << 1) == 0u); +} + +/* Returns the fraction bits of the single-precision floating-point value `a'.*/ +uint +__extractFloat32Frac(uint a) +{ + return a & 0x7FFFFF; +} + +/* Returns the exponent bits of the single-precision floating-point value `a'.*/ +int +__extractFloat32Exp(uint a) +{ + return int((a>>23) & 0xFFu); +} + +bool +__feq32_nonnan(uint a, uint b) +{ + return (a == b) || ((a == 0u) && (((a | b)<<1) == 0u)); +} + +/* Returns true if the single-precision floating-point value `a' is equal to the + * corresponding value `b', and false otherwise. The comparison is performed + * according to the IEEE Standard for Floating-Point Arithmetic. + */ +bool +__feq32(uint a, uint b) +{ + if (__is_nan(a) || __is_nan(b)) + return false; + + return __feq32_nonnan(a, b); +} + +/* Returns true if the single-precision floating-point value `a' is not equal + * to the corresponding value `b', and false otherwise. The comparison is + * performed according to the IEEE Standard for Floating-Point Arithmetic. + */ +bool +__fneu32(uint a, uint b) +{ + if (__is_nan(a) || __is_nan(b)) + return true; + + return !__feq32_nonnan(a, b); +} + +/* Returns the sign bit of the single-precision floating-point value `a'.*/ +uint +__extractFloat32Sign(uint a) +{ + return a & 0x80000000u; +} + +bool +__flt32_nonnan(uint a, uint b) +{ + /* IEEE 754 floating point numbers are specifically designed so that, with + * two exceptions, values can be compared by bit-casting to signed integers + * with the same number of bits. + * + * From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers: + * + * When comparing as 2's-complement integers: If the sign bits differ, + * the negative number precedes the positive number, so 2's complement + * gives the correct result (except that negative zero and positive zero + * should be considered equal). If both values are positive, the 2's + * complement comparison again gives the correct result. Otherwise (two + * negative numbers), the correct FP ordering is the opposite of the 2's + * complement ordering. + * + * The logic implied by the above quotation is: + * + * !both_are_zero(a, b) && (both_negative(a, b) ? a > b : a < b) + * + * This is equivalent to + * + * fneu(a, b) && (both_negative(a, b) ? a >= b : a < b) + * + * fneu(a, b) && (both_negative(a, b) ? !(a < b) : a < b) + * + * fneu(a, b) && ((both_negative(a, b) && !(a < b)) || + * (!both_negative(a, b) && (a < b))) + * + * (A!|B)&(A|!B) is (A xor B) which is implemented here using !=. + * + * fneu(a, b) && (both_negative(a, b) != (a < b)) + */ + bool lt = a < b; + bool both_negative = (a & b & 0x80000000u) != 0; + + return !__feq32_nonnan(a, b) && (lt != both_negative); +} + +bool +__flt32_nonnan_minmax(uint a, uint b) +{ + + /* See __flt32_nonnan. For implementing fmin/fmax, we compare -0 < 0, so the + * implied logic is a bit simpler: + * + * both_negative(a, b) ? a > b : a < b + * + * If a == b, it doesn't matter what we return, so that's equivalent to: + * + * both_negative(a, b) ? a >= b : a < b + * both_negative(a, b) ? !(a < b) : a < b + * both_negative(a, b) ^ (a < b) + * + * XOR is again implemented using !=. + */ + bool lt = a < b; + bool both_negative = (a & b & 0x80000000u) != 0; + + return (lt != both_negative); +} + +/* Returns true if the single-precision floating-point value `a' is less than + * the corresponding value `b', and false otherwise. The comparison is performed + * according to the IEEE Standard for Floating-Point Arithmetic. + */ +bool +__flt32(uint a, uint b) +{ + /* This weird layout matters. Doing the "obvious" thing results in extra + * flow control being inserted to implement the short-circuit evaluation + * rules. Flow control is bad! + */ + bool x = !__is_nan(a); + bool y = !__is_nan(b); + bool z = __flt32_nonnan(a, b); + + return (x && y && z); +} + +/* Returns true if the single-precision floating-point value `a' is greater + * than or equal to * the corresponding value `b', and false otherwise. The + * comparison is performed * according to the IEEE Standard for Floating-Point + * Arithmetic. + */ +bool +__fge32(uint a, uint b) +{ + /* This weird layout matters. Doing the "obvious" thing results in extra + * flow control being inserted to implement the short-circuit evaluation + * rules. Flow control is bad! + */ + bool x = !__is_nan(a); + bool y = !__is_nan(b); + bool z = !__flt32_nonnan(a, b); + + return (x && y && z); +} + +uint +fsat32(uint a) +{ + /* fsat(NaN) should be zero. */ + if (__is_nan(a) || int(a) < 0) + return 0u; + + /* IEEE 754 floating point numbers are specifically designed so that, with + * two exceptions, values can be compared by bit-casting to signed integers + * with the same number of bits. + * + * From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers: + * + * When comparing as 2's-complement integers: If the sign bits differ, + * the negative number precedes the positive number, so 2's complement + * gives the correct result (except that negative zero and positive zero + * should be considered equal). If both values are positive, the 2's + * complement comparison again gives the correct result. Otherwise (two + * negative numbers), the correct FP ordering is the opposite of the 2's + * complement ordering. + * + * We know that both values are not negative, and we know that at least one + * value is not zero. Therefore, we can just use the 2's complement + * comparison ordering. + */ + if (floatBitsToUint(1.0) < a) + return floatBitsToUint(1.0); + + return a; +} + +/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', + * and significand `zSig', and returns the proper single-precision + * floating-point value corresponding to the abstract input. This routine is + * just like `__roundAndPackFloat32' except that the input significand has + * fewer bits and does not have to be normalized. In all cases, `zExp' must be + * 1 less than the "true" floating- point exponent. + */ +uint +__normalizeRoundAndPackFloat32(uint zSign, + int zExp, + uint zFrac) +{ + int shiftCount; + + shiftCount = __countLeadingZeros32(zFrac) - 1; + return __roundAndPackFloat32(zSign, zExp - shiftCount, zFrac<>6); + zFrac = 0x40000000 + aFrac + bFrac; + zExp = aExp; + } else { + if (expDiff < 0) { + EXCHANGE(aFrac, bFrac); + EXCHANGE(aExp, bExp); + } + + if (aExp == 0xFF) + return __propagateFloat32NaNInfAdd(a, b); + + expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0); + bFrac = mix(bFrac | 0x20000000u, bFrac, bExp == 0); + bFrac = __shift32RightJamming(bFrac, expDiff); + zExp = aExp; + + aFrac |= 0x20000000; + zFrac = (aFrac + bFrac)<<1; + --zExp; + if (int(zFrac) < 0) { + zFrac = aFrac + bFrac; + ++zExp; + } + } + return __roundAndPackFloat32(aSign, zExp, zFrac); + } else { + int zExp; + + aFrac <<= 7; + bFrac <<= 7; + if (expDiff != 0) { + uint zFrac; + + if (expDiff < 0) { + EXCHANGE(aFrac, bFrac); + EXCHANGE(aExp, bExp); + aSign ^= 0x80000000u; + } + if (aExp == 0xFF) + return __propagateFloat32NaNInfAdd(a, b); + + expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0); + bFrac = mix(bFrac | 0x40000000u, bFrac, bExp == 0); + bFrac = __shift32RightJamming(bFrac, expDiff); + aFrac |= 0x40000000; + zFrac = aFrac - bFrac; + zExp = aExp; + --zExp; + return __normalizeRoundAndPackFloat32(aSign, zExp, zFrac); + } + if (aExp == 0xFF) + return __propagateFloat32NaNInfAdd(a, b); + bExp = mix(bExp, 1, aExp == 0); + aExp = mix(aExp, 1, aExp == 0); + + uint zFrac; + uint sign_of_difference = 0; + if (bFrac <= aFrac) { + /* It is possible that zFrac may be zero after this. */ + zFrac = aFrac - bFrac; + } else { + zFrac = bFrac - aFrac; + sign_of_difference = 0x80000000; + } + zExp = mix(bExp, aExp, sign_of_difference == 0u); + aSign ^= sign_of_difference; + uint retval_0 = __packFloat32(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u); + uint retval_1 = __normalizeRoundAndPackFloat32(aSign, zExp, zFrac); + return mix(retval_0, retval_1, zFrac != 0u); + } +} + +/* Normalizes the subnormal single-precision floating-point value represented + * by the denormalized significand `aFrac'. The normalized exponent and + * significand are stored at the locations pointed to by `zExpPtr' and +`* `zFracPtr', respectively. + */ +void +__normalizeFloat32Subnormal(uint aFrac, + out int zExpPtr, + out uint zFracPtr) +{ + int shiftCount; + + shiftCount = __countLeadingZeros32(aFrac) - 8; + zFracPtr = aFrac << shiftCount; + zExpPtr = 1 - shiftCount; +} + +/* Returns the result of multiplying the single-precision floating-point values + * `a' and `b'. The operation is performed according to the IEEE Standard for + * Floating-Point Arithmetic. + */ +uint +__fmul32(uint a, uint b) +{ + uint zFrac0 = 0u; + uint zFrac1 = 0u; + int zExp; + + uint aFrac = __extractFloat32Frac(a); + uint bFrac = __extractFloat32Frac(b); + int aExp = __extractFloat32Exp(a); + uint aSign = __extractFloat32Sign(a); + int bExp = __extractFloat32Exp(b); + uint bSign = __extractFloat32Sign(b); + uint zSign = aSign ^ bSign; + if (aExp == 0xFF) { + /* Subnormal values times infinity equals infinity, but other cases can + * use the builtin multiply that may flush denorms to 0. + */ + if (aFrac != 0u || ((bExp == 0xFF) && bFrac != 0) || (bExp | bFrac) == 0) + return __propagateFloat32NaNInfMul(a, b); + return __packFloat32(zSign, 0xFF, 0); + } + if (bExp == 0xFF) { + if (bFrac != 0u || (aExp | aFrac) == 0) + return __propagateFloat32NaNInfMul(a, b); + return __packFloat32(zSign, 0xFF, 0u); + } + if (aExp == 0) { + if (aFrac == 0u) + return __packFloat32(zSign, 0, 0u); + __normalizeFloat32Subnormal(aFrac, aExp, aFrac); + } + if (bExp == 0) { + if (bFrac == 0u) + return __packFloat32(zSign, 0, 0u); + __normalizeFloat32Subnormal(bFrac, bExp, bFrac); + } + zExp = aExp + bExp - 0x7F; + aFrac = ( aFrac | 0x00800000 )<<7; + bFrac = ( bFrac | 0x00800000 )<<8; + umulExtended(aFrac, bFrac, zFrac0, zFrac1); + zFrac0 |= uint(zFrac1 != 0); + if (0 < int(zFrac0 << 1)) { + zFrac0 <<= 1; + --zExp; + } + return __roundAndPackFloat32(zSign, zExp, zFrac0); +} + +uint +__ffma32(uint a, uint b, uint c) +{ + return __fadd32(__fmul32(a, b), c); +} + +uint +__fmin32(uint a, uint b) +{ + /* This weird layout matters. Doing the "obvious" thing results in extra + * flow control being inserted to implement the short-circuit evaluation + * rules. Flow control is bad! + */ + bool b_nan = __is_nan(b); + bool a_lt_b = __flt32_nonnan_minmax(a, b); + bool a_nan = __is_nan(a); + + return (b_nan || a_lt_b) && !a_nan ? a : b; +} + +uint +__fmax32(uint a, uint b) +{ + /* This weird layout matters. Doing the "obvious" thing results in extra + * flow control being inserted to implement the short-circuit evaluation + * rules. Flow control is bad! + */ + bool b_nan = __is_nan(b); + bool a_lt_b = __flt32_nonnan_minmax(a, b); + bool a_nan = __is_nan(a); + + return (!b_nan && a_lt_b) || a_nan ? b : a; +} + diff --git a/src/compiler/meson.build b/src/compiler/meson.build index 92e72a44f9a..d2345865989 100644 --- a/src/compiler/meson.build +++ b/src/compiler/meson.build @@ -25,6 +25,7 @@ builtin_types_c = custom_target( float64_glsl_file = files('glsl/float64.glsl') astc_decoder_glsl_file = files('glsl/astc_decoder.glsl') +float32_glsl_file = files('glsl/float32.glsl') files_libcompiler = files( 'glsl_types.c', diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 8ee22cc1e74..d8784412ffd 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -166,6 +166,7 @@ else 'nir_lower_explicit_io.c', 'nir_lower_fb_read.c', 'nir_lower_flatshade.c', + 'nir_lower_floats.c', 'nir_lower_flrp.c', 'nir_lower_fp16_conv.c', 'nir_lower_fragcoord_wtrans.c', @@ -326,6 +327,7 @@ else 'nir_serialize.h', 'nir_shader_bisect.c', 'nir_shader_compiler_options.h', + 'nir_softfloat.h', 'nir_split_64bit_vec3_and_vec4.c', 'nir_split_conversions.c', 'nir_split_per_member_structs.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f3ba8222c65..f2df8f5e57d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6063,6 +6063,7 @@ bool nir_lower_int64_float_conversions(nir_shader *shader); nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, nir_lower_doubles_options options); +bool nir_lower_floats(nir_shader *shader, const nir_shader *softfp32); bool nir_lower_pack(nir_shader *shader); nir_intrinsic_instr *nir_get_io_intrinsic(nir_instr *instr, nir_variable_mode modes, diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 134c1f66713..12df030de9c 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -24,6 +24,7 @@ #include "nir.h" #include "nir_builder.h" +#include "nir_softfloat.h" #include #include @@ -692,37 +693,7 @@ lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr, assert(func); } - nir_def *params[4] = { - NULL, - }; - - nir_variable *ret_tmp = - nir_local_variable_create(b->impl, return_type, "return_tmp"); - nir_deref_instr *ret_deref = nir_build_deref_var(b, ret_tmp); - params[0] = &ret_deref->def; - - assert(nir_op_infos[instr->op].num_inputs + 1 == func->num_params); - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - nir_alu_type n_type = - nir_alu_type_get_base_type(nir_op_infos[instr->op].input_types[i]); - /* Add bitsize */ - n_type = n_type | instr->src[0].src.ssa->bit_size; - - const struct glsl_type *param_type = - glsl_scalar_type(nir_get_glsl_base_type_for_nir_type(n_type)); - - nir_variable *param = - nir_local_variable_create(b->impl, param_type, "param"); - nir_deref_instr *param_deref = nir_build_deref_var(b, param); - nir_store_deref(b, param_deref, nir_mov_alu(b, instr->src[i], 1), ~0); - - assert(i + 1 < ARRAY_SIZE(params)); - params[i + 1] = ¶m_deref->def; - } - - nir_inline_function_impl(b, func->impl, params, NULL); - - return nir_load_deref(b, ret_deref); + return nir_lower_softfloat_func(b, instr, func, return_type); } nir_lower_doubles_options diff --git a/src/compiler/nir/nir_lower_floats.c b/src/compiler/nir/nir_lower_floats.c new file mode 100644 index 00000000000..99cc40d578f --- /dev/null +++ b/src/compiler/nir/nir_lower_floats.c @@ -0,0 +1,156 @@ +/* + * Copyright © 2015 Intel Corporation + * Copyright © 2025 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_softfloat.h" + +static nir_def * +lower_float_instr_to_soft(nir_builder *b, nir_instr *instr, + void *data) +{ + const char *mangled_name; + nir_alu_instr *alu = nir_instr_as_alu(instr); + const struct glsl_type *return_type = glsl_uint_type(); + const nir_shader *softfp32 = data; + + switch (alu->op) { + case nir_op_fabs: + mangled_name = "__fabs32(u1;"; + break; + case nir_op_fneg: + mangled_name = "__fneg32(u1;"; + break; + case nir_op_fsign: + mangled_name = "__fsign32(u1;"; + break; + case nir_op_feq: + mangled_name = "__feq32(u1;u1;"; + return_type = glsl_bool_type(); + break; + case nir_op_fneu: + mangled_name = "__fneu32(u1;u1;"; + return_type = glsl_bool_type(); + break; + case nir_op_flt: + mangled_name = "__flt32(u1;u1;"; + return_type = glsl_bool_type(); + break; + case nir_op_fge: + mangled_name = "__fge32(u1;u1;"; + return_type = glsl_bool_type(); + break; + case nir_op_fmin: + mangled_name = "__fmin32(u1;u1;"; + break; + case nir_op_fmax: + mangled_name = "__fmax32(u1;u1;"; + break; + case nir_op_fadd: + mangled_name = "__fadd32(u1;u1;"; + break; + case nir_op_fmul: + mangled_name = "__fmul32(u1;u1;"; + break; + case nir_op_ffma: + mangled_name = "__ffma32(u1;u1;u1;"; + break; + case nir_op_fsat: + mangled_name = "__fsat32(u1;"; + break; + default: + return NULL; + } + + /* Some of the implementations use floating-point primitives in a way where + * rounding mode and denorm mode does not matter, for example to propagate + * NaNs. By inserting everything before the instruction we avoid iterating + * over the inlined instructions again and avoid calling the lowering on + * them, avoiding infinite loops. + */ + b->cursor = nir_before_instr(instr); + + nir_function *func = nir_shader_get_function_for_name(softfp32, mangled_name); + + if (!func || !func->impl) { + fprintf(stderr, "Cannot find function \"%s\"\n", mangled_name); + assert(func); + } + + return nir_lower_softfloat_func(b, alu, func, return_type); +} + +static bool +should_lower_float_instr(const nir_instr *instr, const void *_data) +{ + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + return alu->src[0].src.ssa->bit_size == 32; +} + +static bool +nir_lower_floats_impl(nir_function_impl *impl, + const nir_shader *softfp32) +{ + bool progress = + nir_function_impl_lower_instructions(impl, + should_lower_float_instr, + lower_float_instr_to_soft, + (void *)softfp32); + + if (progress) { + /* Indices are completely messed up now */ + nir_index_ssa_defs(impl); + + nir_progress(true, impl, nir_metadata_none); + + /* And we have deref casts we need to clean up thanks to function + * inlining. + */ + nir_opt_deref_impl(impl); + } else + nir_progress(progress, impl, nir_metadata_control_flow); + + return progress; +} + +/* Some implementations do not implement preserving denorms for + * single-precision floats. This implements lowering those to softfloats when + * denorms are forced on. + */ +bool +nir_lower_floats(nir_shader *shader, + const nir_shader *softfp32) +{ + bool progress = false; + + nir_foreach_function_impl(impl, shader) { + progress |= nir_lower_floats_impl(impl, softfp32); + } + + return progress; +} diff --git a/src/compiler/nir/nir_softfloat.h b/src/compiler/nir/nir_softfloat.h new file mode 100644 index 00000000000..64f9f07dbe0 --- /dev/null +++ b/src/compiler/nir/nir_softfloat.h @@ -0,0 +1,68 @@ +/* + * Copyright © 2015 Intel Corporation + * Copyright © 2025 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "nir.h" +#include "nir_builder.h" + + +static inline nir_def * +nir_lower_softfloat_func(nir_builder *b, + nir_alu_instr *instr, + nir_function *softfloat_func, + const struct glsl_type *return_type) +{ + nir_def *params[4] = { + NULL, + }; + + nir_variable *ret_tmp = + nir_local_variable_create(b->impl, return_type, "return_tmp"); + nir_deref_instr *ret_deref = nir_build_deref_var(b, ret_tmp); + params[0] = &ret_deref->def; + + assert(nir_op_infos[instr->op].num_inputs + 1 == softfloat_func->num_params); + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + nir_alu_type n_type = + nir_alu_type_get_base_type(nir_op_infos[instr->op].input_types[i]); + /* Add bitsize */ + n_type = n_type | instr->src[0].src.ssa->bit_size; + + const struct glsl_type *param_type = + glsl_scalar_type(nir_get_glsl_base_type_for_nir_type(n_type)); + + nir_variable *param = + nir_local_variable_create(b->impl, param_type, "param"); + nir_deref_instr *param_deref = nir_build_deref_var(b, param); + nir_store_deref(b, param_deref, nir_mov_alu(b, instr->src[i], 1), ~0); + + assert(i + 1 < ARRAY_SIZE(params)); + params[i + 1] = ¶m_deref->def; + } + + nir_inline_function_impl(b, softfloat_func->impl, params, NULL); + + return nir_load_deref(b, ret_deref); +} +