gallivm: use llvm intrinsics for 16-bit round/trunc/roundeven

Otherwise the inf translations don't seem to work, and the VK CTS
fails

Fixes VK CTS dEQP-VK.spirv_assembly.instruction.graphics.float16.arithmetic*

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11816>
This commit is contained in:
Dave Airlie 2021-09-07 11:17:39 +10:00 committed by Marge Bot
parent 2277386565
commit 0d3b285360
2 changed files with 20 additions and 1 deletions

View file

@ -2030,6 +2030,12 @@ lp_build_trunc(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
if (type.width == 16) {
char intrinsic[64];
lp_format_intrinsic(intrinsic, 64, "llvm.trunc", bld->vec_type);
return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
}
if (arch_rounding_available(type)) {
return lp_build_round_arch(bld, a, LP_BUILD_ROUND_TRUNCATE);
}
@ -2083,6 +2089,12 @@ lp_build_round(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
if (type.width == 16) {
char intrinsic[64];
lp_format_intrinsic(intrinsic, 64, "llvm.round", bld->vec_type);
return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
}
if (arch_rounding_available(type)) {
return lp_build_round_arch(bld, a, LP_BUILD_ROUND_NEAREST);
}

View file

@ -32,6 +32,7 @@
#include "lp_bld_logic.h"
#include "lp_bld_quad.h"
#include "lp_bld_flow.h"
#include "lp_bld_intr.h"
#include "lp_bld_struct.h"
#include "lp_bld_debug.h"
#include "lp_bld_printf.h"
@ -798,7 +799,13 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_fround_even:
result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
if (src_bit_size[0] == 16) {
struct lp_build_context *bld = get_flt_bld(bld_base, 16);
char intrinsic[64];
lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
} else
result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_frsq:
result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);