gallivm: use llvm intrinsics for 16-bit round/trunc/roundeven

Otherwise the inf translations don't seem to work, and the VK CTS fails Fixes VK CTS dEQP-VK.spirv_assembly.instruction.graphics.float16.arithmetic* Reviewed-by: Roland Scheidegger <sroland@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11816>
2026-05-06 07:18:17 +02:00 · 2021-09-07 11:17:39 +10:00 · 2021-09-07 11:17:39 +10:00 · 0d3b285360
commit 0d3b285360
parent 2277386565
2 changed files with 20 additions and 1 deletions
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@ -2030,6 +2030,12 @@ lp_build_trunc(struct lp_build_context *bld,
   assert(type.floating);
   assert(lp_check_value(type, a));

+   if (type.width == 16) {
+      char intrinsic[64];
+      lp_format_intrinsic(intrinsic, 64, "llvm.trunc", bld->vec_type);
+      return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+   }
+
   if (arch_rounding_available(type)) {
      return lp_build_round_arch(bld, a, LP_BUILD_ROUND_TRUNCATE);
   }
@ -2083,6 +2089,12 @@ lp_build_round(struct lp_build_context *bld,
   assert(type.floating);
   assert(lp_check_value(type, a));

+   if (type.width == 16) {
+      char intrinsic[64];
+      lp_format_intrinsic(intrinsic, 64, "llvm.round", bld->vec_type);
+      return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+   }
+
   if (arch_rounding_available(type)) {
      return lp_build_round_arch(bld, a, LP_BUILD_ROUND_NEAREST);
   }
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
@ -32,6 +32,7 @@
 #include "lp_bld_logic.h"
 #include "lp_bld_quad.h"
 #include "lp_bld_flow.h"
+#include "lp_bld_intr.h"
 #include "lp_bld_struct.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_printf.h"
@ -798,7 +799,13 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
      result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
      break;
   case nir_op_fround_even:
-      result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
+      if (src_bit_size[0] == 16) {
+	 struct lp_build_context *bld = get_flt_bld(bld_base, 16);
+	 char intrinsic[64];
+	 lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
+	 result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
+      } else
+	 result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
      break;
   case nir_op_frsq:
      result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);