From cc82f80dcbdd7e6be2445e73dfa73c31970e5b35 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 12 Jul 2024 15:55:10 -0500 Subject: [PATCH] nak/nir: Split 64-bit conversions pre-Volta Part-of: --- src/nouveau/compiler/meson.build | 1 + src/nouveau/compiler/nak_nir.c | 3 + .../nak_nir_split_64bit_conversions.c | 95 +++++++++++++++++++ src/nouveau/compiler/nak_private.h | 1 + 4 files changed, 100 insertions(+) create mode 100644 src/nouveau/compiler/nak_nir_split_64bit_conversions.c diff --git a/src/nouveau/compiler/meson.build b/src/nouveau/compiler/meson.build index 43d0d9fd6b7..a049cb526b7 100644 --- a/src/nouveau/compiler/meson.build +++ b/src/nouveau/compiler/meson.build @@ -62,6 +62,7 @@ libnak_c_files = files( 'nak_nir_lower_scan_reduce.c', 'nak_nir_lower_tex.c', 'nak_nir_lower_vtg_io.c', + 'nak_nir_split_64bit_conversions.c', 'nak_memstream.c', ) diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index d25ac9cd384..4e5f827b282 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -1007,6 +1007,9 @@ nak_postprocess_nir(nir_shader *nir, } } while (progress); + if (nak->sm < 70) + OPT(nir, nak_nir_split_64bit_conversions); + nir_convert_to_lcssa(nir, true, true); nir_divergence_analysis(nir); diff --git a/src/nouveau/compiler/nak_nir_split_64bit_conversions.c b/src/nouveau/compiler/nak_nir_split_64bit_conversions.c new file mode 100644 index 00000000000..812b1c82089 --- /dev/null +++ b/src/nouveau/compiler/nak_nir_split_64bit_conversions.c @@ -0,0 +1,95 @@ +/* + * Copyright © 2024 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* Adapted from intel_nir_lower_conversions.c */ + +#include "nak_private.h" +#include "nir_builder.h" + +static bool +split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data) +{ + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + if (!nir_op_infos[alu->op].is_conversion) + return false; + + unsigned src_bit_size = nir_src_bit_size(alu->src[0].src); + nir_alu_type src_type = nir_op_infos[alu->op].input_types[0]; + nir_alu_type src_full_type = (nir_alu_type) (src_type | src_bit_size); + + unsigned dst_bit_size = alu->def.bit_size; + nir_alu_type dst_full_type = nir_op_infos[alu->op].output_type; + assert(nir_alu_type_get_type_size(dst_full_type) == dst_bit_size); + nir_alu_type dst_type = nir_alu_type_get_base_type(dst_full_type); + + /* We can't cross the 64-bit boundary in one conversion */ + if ((src_bit_size <= 32 && dst_bit_size <= 32) || + (src_bit_size >= 32 && dst_bit_size >= 32)) + return false; + + nir_alu_type tmp_type; + if ((src_full_type == nir_type_float16 && dst_bit_size == 64) || + (src_bit_size == 64 && dst_full_type == nir_type_float16)) { + /* It is important that the intermediate conversion happens through a + * 32-bit float type so we don't lose range when we convert to/from + * a 64-bit integer. + */ + tmp_type = nir_type_float32; + } else { + /* For fp64 to integer conversions, using an integer intermediate type + * ensures that rounding happens as part of the first conversion, + * avoiding any chance of rtne rounding happening before the conversion + * to integer (which is expected to round towards zero). + * + * NOTE: NVIDIA hardware saturates conversions by default and the second + * conversion will not saturate in this case. However, GLSL makes OOB + * values in conversions undefiend. + * + * For all other conversions, the conversion from int to int is either + * lossless or just as lossy as the final conversion. + */ + tmp_type = dst_type | 32; + } + + b->cursor = nir_before_instr(&alu->instr); + nir_def *src = nir_ssa_for_alu_src(b, alu, 0); + nir_def *tmp = nir_type_convert(b, src, src_type, tmp_type, + nir_rounding_mode_undef); + nir_def *res = nir_type_convert(b, tmp, tmp_type, dst_full_type, + nir_rounding_mode_undef); + nir_def_replace(&alu->def, res); + + return true; +} + +bool +nak_nir_split_64bit_conversions(nir_shader *nir) +{ + return nir_shader_instructions_pass(nir, split_64bit_conversion, + nir_metadata_control_flow, + NULL); +} diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h index f6d034b1b0b..bbb23b9098c 100644 --- a/src/nouveau/compiler/nak_private.h +++ b/src/nouveau/compiler/nak_private.h @@ -218,6 +218,7 @@ enum nak_fs_out { #define NAK_FS_OUT_COLOR(n) (NAK_FS_OUT_COLOR0 + (n) * 16) +bool nak_nir_split_64bit_conversions(nir_shader *nir); bool nak_nir_lower_non_uniform_ldcx(nir_shader *nir); bool nak_nir_add_barriers(nir_shader *nir, const struct nak_compiler *nak); bool nak_nir_lower_cf(nir_shader *nir);