From cc82f80dcbdd7e6be2445e73dfa73c31970e5b35 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Fri, 12 Jul 2024 15:55:10 -0500
Subject: [PATCH] nak/nir: Split 64-bit conversions pre-Volta

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30281>
---
 src/nouveau/compiler/meson.build              |  1 +
 src/nouveau/compiler/nak_nir.c                |  3 +
 .../nak_nir_split_64bit_conversions.c         | 95 +++++++++++++++++++
 src/nouveau/compiler/nak_private.h            |  1 +
 4 files changed, 100 insertions(+)
 create mode 100644 src/nouveau/compiler/nak_nir_split_64bit_conversions.c

diff --git a/src/nouveau/compiler/meson.build b/src/nouveau/compiler/meson.build
index 43d0d9fd6b7..a049cb526b7 100644
--- a/src/nouveau/compiler/meson.build
+++ b/src/nouveau/compiler/meson.build
@@ -62,6 +62,7 @@ libnak_c_files = files(
   'nak_nir_lower_scan_reduce.c',
   'nak_nir_lower_tex.c',
   'nak_nir_lower_vtg_io.c',
+  'nak_nir_split_64bit_conversions.c',
   'nak_memstream.c',
 )
 
diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c
index d25ac9cd384..4e5f827b282 100644
--- a/src/nouveau/compiler/nak_nir.c
+++ b/src/nouveau/compiler/nak_nir.c
@@ -1007,6 +1007,9 @@ nak_postprocess_nir(nir_shader *nir,
       }
    } while (progress);
 
+   if (nak->sm < 70)
+      OPT(nir, nak_nir_split_64bit_conversions);
+
    nir_convert_to_lcssa(nir, true, true);
    nir_divergence_analysis(nir);
 
diff --git a/src/nouveau/compiler/nak_nir_split_64bit_conversions.c b/src/nouveau/compiler/nak_nir_split_64bit_conversions.c
new file mode 100644
index 00000000000..812b1c82089
--- /dev/null
+++ b/src/nouveau/compiler/nak_nir_split_64bit_conversions.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2024 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* Adapted from intel_nir_lower_conversions.c */
+
+#include "nak_private.h"
+#include "nir_builder.h"
+
+static bool
+split_64bit_conversion(nir_builder *b, nir_instr *instr, UNUSED void *_data)
+{
+   if (instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+   if (!nir_op_infos[alu->op].is_conversion)
+      return false;
+
+   unsigned src_bit_size = nir_src_bit_size(alu->src[0].src);
+   nir_alu_type src_type = nir_op_infos[alu->op].input_types[0];
+   nir_alu_type src_full_type = (nir_alu_type) (src_type | src_bit_size);
+
+   unsigned dst_bit_size = alu->def.bit_size;
+   nir_alu_type dst_full_type = nir_op_infos[alu->op].output_type;
+   assert(nir_alu_type_get_type_size(dst_full_type) == dst_bit_size);
+   nir_alu_type dst_type = nir_alu_type_get_base_type(dst_full_type);
+
+   /* We can't cross the 64-bit boundary in one conversion */
+   if ((src_bit_size <= 32 && dst_bit_size <= 32) ||
+       (src_bit_size >= 32 && dst_bit_size >= 32))
+      return false;
+
+   nir_alu_type tmp_type;
+   if ((src_full_type == nir_type_float16 && dst_bit_size == 64) ||
+       (src_bit_size == 64 && dst_full_type == nir_type_float16)) {
+      /* It is important that the intermediate conversion happens through a
+       * 32-bit float type so we don't lose range when we convert to/from
+       * a 64-bit integer.
+       */
+      tmp_type = nir_type_float32;
+   } else {
+      /* For fp64 to integer conversions, using an integer intermediate type
+       * ensures that rounding happens as part of the first conversion,
+       * avoiding any chance of rtne rounding happening before the conversion
+       * to integer (which is expected to round towards zero).
+       *
+       * NOTE: NVIDIA hardware saturates conversions by default and the second
+       * conversion will not saturate in this case.  However, GLSL makes OOB
+       * values in conversions undefiend.
+       *
+       * For all other conversions, the conversion from int to int is either
+       * lossless or just as lossy as the final conversion.
+       */
+      tmp_type = dst_type | 32;
+   }
+
+   b->cursor = nir_before_instr(&alu->instr);
+   nir_def *src = nir_ssa_for_alu_src(b, alu, 0);
+   nir_def *tmp = nir_type_convert(b, src, src_type, tmp_type,
+                                   nir_rounding_mode_undef);
+   nir_def *res = nir_type_convert(b, tmp, tmp_type, dst_full_type,
+                                   nir_rounding_mode_undef);
+   nir_def_replace(&alu->def, res);
+
+   return true;
+}
+
+bool
+nak_nir_split_64bit_conversions(nir_shader *nir)
+{
+   return nir_shader_instructions_pass(nir, split_64bit_conversion,
+                                       nir_metadata_control_flow,
+                                       NULL);
+}
diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h
index f6d034b1b0b..bbb23b9098c 100644
--- a/src/nouveau/compiler/nak_private.h
+++ b/src/nouveau/compiler/nak_private.h
@@ -218,6 +218,7 @@ enum nak_fs_out {
 
 #define NAK_FS_OUT_COLOR(n) (NAK_FS_OUT_COLOR0 + (n) * 16)
 
+bool nak_nir_split_64bit_conversions(nir_shader *nir);
 bool nak_nir_lower_non_uniform_ldcx(nir_shader *nir);
 bool nak_nir_add_barriers(nir_shader *nir, const struct nak_compiler *nak);
 bool nak_nir_lower_cf(nir_shader *nir);