# Copyright © 2024 Intel Corporation # SPDX-License-Identifier: MIT import argparse import sys from math import pi a = 'a' b = 'b' lower_fsign = [ # This matches the behavior of the old optimization in brw_fs_nir.cpp, but # it has some problems. # # The fmul version passes Vulkan float_controls2 CTS a little bit by # luck. The use of fne means that the false path (i.e., fsign(X) == 0) is # only taken when X is zero. For OpenCL, this path should also be taken # when when X is NaN. This can be handled by using 'fabs(X) > 0', but this # fails float_controls2 CTS when the other multiplication operand is NaN. # # This optimization is additionally problematic when fsign(X) is zero and # the other multiplication operand is Inf. This will result in 0, but it # should result in NaN. This does not seem to be tested by the CTS. # # NOTE: fcsel opcodes are currently limited to float32 in NIR. (('fmul@32', ('fsign(is_used_once)', a), b), ('fcsel', a , ('ixor', ('iand', a, 0x80000000), b), ('iand', a, 0x80000000))), (('fmul@16', ('fsign(is_used_once)', a), b), ('bcsel', ('fneu', a, 0), ('ixor', ('iand', a, 0x8000 ), b), ('iand', a, 0x8000 ))), # This is 99.99% strictly correct for OpenCL. It will provide correctly # signed zero for ±0 inputs, and it will provide zero for NaN inputs. The # only slight deviation is that it can provide -0 for some NaN inputs. (('fsign@32', a), ('fcsel_gt', ('fabs', a) , ('ior', ('iand', a, 0x80000000), 0x3f800000), ('iand', a, 0x80000000))), (('fsign@16', a), ('bcsel', ('!flt', 0, ('fabs', a)), ('ior', ('iand', a, 0x8000 ), 0x3c00 ), ('iand', a, 0x8000 ))), ] def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--import-path', required=True) args = parser.parse_args() sys.path.insert(0, args.import_path) run() def run(): import nir_algebraic # pylint: disable=import-error print('#include "brw_nir.h"') print(nir_algebraic.AlgebraicPass("brw_nir_lower_fsign", lower_fsign).render()) if __name__ == '__main__': main()