meson: Require LLVM 8 or newer

This isn't a huge cleanup on its own, but it lets us start assuming
coroutine support, and I would like to unify graphics shader dispatch
to work the same way as compute shader dispatch.

Acked-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35374>
This commit is contained in:
Adam Jackson 2025-01-30 09:42:39 -05:00 committed by Marge Bot
parent c459be3795
commit 6a28d6707d
10 changed files with 36 additions and 319 deletions

View file

@ -1736,7 +1736,7 @@ if with_amd_vk or with_gallium_radeonsi
elif with_clc or llvm_with_orcjit
_llvm_version = '>= 15.0.0'
else
_llvm_version = '>= 5.0.0'
_llvm_version = '>= 8.0.0'
endif
_shared_llvm = get_option('shared-llvm') \

View file

@ -87,54 +87,6 @@
#define LLVMInsertBasicBlock ILLEGAL_LLVM_FUNCTION
#define LLVMCreateBuilder ILLEGAL_LLVM_FUNCTION
#if LLVM_VERSION_MAJOR >= 8
#define GALLIVM_COROUTINES 1
#else
#define GALLIVM_COROUTINES 0
#endif
/* LLVM is transitioning to "opaque pointers", and as such deprecates
* LLVMBuildGEP, LLVMBuildCall, LLVMBuildLoad, replacing them with
* LLVMBuildGEP2, LLVMBuildCall2, LLVMBuildLoad2 respectivelly.
* These new functions were added in LLVM 8.0; so for LLVM before 8.0 we
* simply forward to the non-opaque-pointer variants.
*/
#if LLVM_VERSION_MAJOR < 8
static inline LLVMValueRef
LLVMBuildGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
LLVMValueRef Pointer, LLVMValueRef *Indices,
unsigned NumIndices, const char *Name)
{
return LLVMBuildGEP(B, Pointer, Indices, NumIndices, Name);
}
static inline LLVMValueRef
LLVMBuildInBoundsGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
LLVMValueRef Pointer, LLVMValueRef *Indices,
unsigned NumIndices, const char *Name)
{
return LLVMBuildInBoundsGEP(B, Pointer, Indices, NumIndices, Name);
}
static inline LLVMValueRef
LLVMBuildLoad2(LLVMBuilderRef B, LLVMTypeRef Ty,
LLVMValueRef PointerVal, const char *Name)
{
LLVMValueRef val = LLVMBuildLoad(B, PointerVal, Name);
return LLVMTypeOf(val) == Ty ? val : LLVMBuildBitCast(B, val, Ty, Name);
}
static inline LLVMValueRef
LLVMBuildCall2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
LLVMValueRef *Args, unsigned NumArgs,
const char *Name)
{
return LLVMBuildCall(B, Fn, Args, NumArgs, Name);
}
#endif /* LLVM_VERSION_MAJOR < 8 */
typedef struct lp_context_ref {
#if GALLIVM_USE_ORCJIT
LLVMOrcThreadSafeContextRef ref;

View file

@ -427,33 +427,10 @@ lp_build_add(struct lp_build_context *bld,
return bld->one;
if (!type.floating && !type.fixed) {
if (LLVM_VERSION_MAJOR >= 8) {
char intrin[32];
intrinsic = type.sign ? "llvm.sadd.sat" : "llvm.uadd.sat";
lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type);
return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
}
if (type.width * type.length == 128) {
if (util_get_cpu_caps()->has_sse2) {
if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
} else if (util_get_cpu_caps()->has_altivec) {
if (type.width == 8)
intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
if (type.width == 16)
intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs";
}
}
if (type.width * type.length == 256) {
if (util_get_cpu_caps()->has_avx2) {
if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b";
if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : "llvm.x86.avx2.paddus.w";
}
}
char intrin[32];
intrinsic = type.sign ? "llvm.sadd.sat" : "llvm.uadd.sat";
lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type);
return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
}
if (intrinsic)
@ -753,33 +730,10 @@ lp_build_sub(struct lp_build_context *bld,
return bld->zero;
if (!type.floating && !type.fixed) {
if (LLVM_VERSION_MAJOR >= 8) {
char intrin[32];
intrinsic = type.sign ? "llvm.ssub.sat" : "llvm.usub.sat";
lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type);
return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
}
if (type.width * type.length == 128) {
if (util_get_cpu_caps()->has_sse2) {
if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
} else if (util_get_cpu_caps()->has_altivec) {
if (type.width == 8)
intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
if (type.width == 16)
intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs";
}
}
if (type.width * type.length == 256) {
if (util_get_cpu_caps()->has_avx2) {
if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b";
if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.avx2.psubs.w" : "llvm.x86.avx2.psubus.w";
}
}
char intrin[32];
intrinsic = type.sign ? "llvm.ssub.sat" : "llvm.usub.sat";
lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type);
return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
}
if (intrinsic)
@ -1007,128 +961,12 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
LLVMValueRef b,
LLVMValueRef *res_hi)
{
struct gallivm_state *gallivm = bld->gallivm;
LLVMBuilderRef builder = gallivm->builder;
assert(bld->type.width == 32);
assert(bld->type.floating == 0);
assert(bld->type.fixed == 0);
assert(bld->type.norm == 0);
/*
* XXX: for some reason, with zext/zext/mul/trunc the code llvm produces
* for x86 simd is atrocious (even if the high bits weren't required),
* trying to handle real 64bit inputs (which of course can't happen due
* to using 64bit umul with 32bit numbers zero-extended to 64bit, but
* apparently llvm does not recognize this widening mul). This includes 6
* (instead of 2) pmuludq plus extra adds and shifts
* The same story applies to signed mul, albeit fixing this requires sse41.
* https://llvm.org/bugs/show_bug.cgi?id=30845
* So, whip up our own code, albeit only for length 4 and 8 (which
* should be good enough)...
* FIXME: For llvm >= 7.0 we should match the autoupgrade pattern
* (bitcast/and/mul/shuffle for unsigned, bitcast/shl/ashr/mul/shuffle
* for signed), which the fallback code does not, without this llvm
* will likely still produce atrocious code.
*/
if (LLVM_VERSION_MAJOR < 7 &&
(bld->type.length == 4 || bld->type.length == 8) &&
((util_get_cpu_caps()->has_sse2 && (bld->type.sign == 0)) ||
util_get_cpu_caps()->has_sse4_1)) {
const char *intrinsic = NULL;
LLVMValueRef aeven, aodd, beven, bodd, muleven, mulodd;
LLVMValueRef shuf[LP_MAX_VECTOR_WIDTH / 32], shuf_vec;
struct lp_type type_wide = lp_wider_type(bld->type);
LLVMTypeRef wider_type = lp_build_vec_type(gallivm, type_wide);
unsigned i;
for (i = 0; i < bld->type.length; i += 2) {
shuf[i] = lp_build_const_int32(gallivm, i+1);
shuf[i+1] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
}
shuf_vec = LLVMConstVector(shuf, bld->type.length);
aeven = a;
beven = b;
aodd = LLVMBuildShuffleVector(builder, aeven, bld->undef, shuf_vec, "");
bodd = LLVMBuildShuffleVector(builder, beven, bld->undef, shuf_vec, "");
if (util_get_cpu_caps()->has_avx2 && bld->type.length == 8) {
if (bld->type.sign) {
intrinsic = "llvm.x86.avx2.pmul.dq";
} else {
intrinsic = "llvm.x86.avx2.pmulu.dq";
}
muleven = lp_build_intrinsic_binary(builder, intrinsic,
wider_type, aeven, beven);
mulodd = lp_build_intrinsic_binary(builder, intrinsic,
wider_type, aodd, bodd);
}
else {
/* for consistent naming look elsewhere... */
if (bld->type.sign) {
intrinsic = "llvm.x86.sse41.pmuldq";
} else {
intrinsic = "llvm.x86.sse2.pmulu.dq";
}
/*
* XXX If we only have AVX but not AVX2 this is a pain.
* lp_build_intrinsic_binary_anylength() can't handle it
* (due to src and dst type not being identical).
*/
if (bld->type.length == 8) {
LLVMValueRef aevenlo, aevenhi, bevenlo, bevenhi;
LLVMValueRef aoddlo, aoddhi, boddlo, boddhi;
LLVMValueRef muleven2[2], mulodd2[2];
struct lp_type type_wide_half = type_wide;
LLVMTypeRef wtype_half;
type_wide_half.length = 2;
wtype_half = lp_build_vec_type(gallivm, type_wide_half);
aevenlo = lp_build_extract_range(gallivm, aeven, 0, 4);
aevenhi = lp_build_extract_range(gallivm, aeven, 4, 4);
bevenlo = lp_build_extract_range(gallivm, beven, 0, 4);
bevenhi = lp_build_extract_range(gallivm, beven, 4, 4);
aoddlo = lp_build_extract_range(gallivm, aodd, 0, 4);
aoddhi = lp_build_extract_range(gallivm, aodd, 4, 4);
boddlo = lp_build_extract_range(gallivm, bodd, 0, 4);
boddhi = lp_build_extract_range(gallivm, bodd, 4, 4);
muleven2[0] = lp_build_intrinsic_binary(builder, intrinsic,
wtype_half, aevenlo, bevenlo);
mulodd2[0] = lp_build_intrinsic_binary(builder, intrinsic,
wtype_half, aoddlo, boddlo);
muleven2[1] = lp_build_intrinsic_binary(builder, intrinsic,
wtype_half, aevenhi, bevenhi);
mulodd2[1] = lp_build_intrinsic_binary(builder, intrinsic,
wtype_half, aoddhi, boddhi);
muleven = lp_build_concat(gallivm, muleven2, type_wide_half, 2);
mulodd = lp_build_concat(gallivm, mulodd2, type_wide_half, 2);
}
else {
muleven = lp_build_intrinsic_binary(builder, intrinsic,
wider_type, aeven, beven);
mulodd = lp_build_intrinsic_binary(builder, intrinsic,
wider_type, aodd, bodd);
}
}
muleven = LLVMBuildBitCast(builder, muleven, bld->vec_type, "");
mulodd = LLVMBuildBitCast(builder, mulodd, bld->vec_type, "");
for (i = 0; i < bld->type.length; i += 2) {
shuf[i] = lp_build_const_int32(gallivm, i + 1);
shuf[i+1] = lp_build_const_int32(gallivm, i + 1 + bld->type.length);
}
shuf_vec = LLVMConstVector(shuf, bld->type.length);
*res_hi = LLVMBuildShuffleVector(builder, muleven, mulodd, shuf_vec, "");
for (i = 0; i < bld->type.length; i += 2) {
shuf[i] = lp_build_const_int32(gallivm, i);
shuf[i+1] = lp_build_const_int32(gallivm, i + bld->type.length);
}
shuf_vec = LLVMConstVector(shuf, bld->type.length);
return LLVMBuildShuffleVector(builder, muleven, mulodd, shuf_vec, "");
}
else {
return lp_build_mul_32_lohi(bld, a, b, res_hi);
}
return lp_build_mul_32_lohi(bld, a, b, res_hi);
}
@ -1727,27 +1565,6 @@ lp_build_abs(struct lp_build_context *bld,
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
}
if (type.width*type.length == 128 && util_get_cpu_caps()->has_ssse3 && LLVM_VERSION_MAJOR < 6) {
switch(type.width) {
case 8:
return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
case 16:
return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
case 32:
return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
}
}
else if (type.width*type.length == 256 && util_get_cpu_caps()->has_avx2 && LLVM_VERSION_MAJOR < 6) {
switch(type.width) {
case 8:
return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", vec_type, a);
case 16:
return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.w", vec_type, a);
case 32:
return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.d", vec_type, a);
}
}
return lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero),
a, LLVMBuildNeg(builder, a, ""));
}
@ -1881,7 +1698,7 @@ static bool
arch_rounding_available(const struct lp_type type)
{
if ((util_get_cpu_caps()->has_sse4_1 &&
(type.length == 1 || (LLVM_VERSION_MAJOR >= 8 && type.length == 2) ||
(type.length == 1 || type.length == 2 ||
type.width * type.length == 128)) ||
(util_get_cpu_caps()->has_avx && type.width * type.length == 256) ||
(util_get_cpu_caps()->has_avx512f && type.width * type.length == 512))

View file

@ -31,15 +31,6 @@
#include "lp_bld_intr.h"
#include "lp_bld_flow.h"
#if LLVM_VERSION_MAJOR < 6
/* not a wrapper, just lets it compile */
static LLVMTypeRef LLVMTokenTypeInContext(LLVMContextRef C)
{
assert(0);
return LLVMVoidTypeInContext(C);
}
#endif
LLVMValueRef lp_build_coro_id(struct gallivm_state *gallivm)
{
LLVMValueRef coro_id_args[4];

View file

@ -467,36 +467,27 @@ lp_build_pavgb(struct lp_build_context *bld8,
LLVMBuilderRef builder = gallivm->builder;
assert(bld8->type.width == 8);
assert(bld8->type.length == 16 || bld8->type.length == 32);
if (LLVM_VERSION_MAJOR < 6) {
LLVMValueRef intrargs[2];
char *intr_name = bld8->type.length == 32 ? "llvm.x86.avx2.pavg.b" :
"llvm.x86.sse2.pavg.b";
intrargs[0] = v0;
intrargs[1] = v1;
return lp_build_intrinsic(builder, intr_name,
bld8->vec_type, intrargs, 2, 0);
} else {
/*
* Must match llvm's autoupgrade of pavg.b intrinsic to be useful.
* You better hope the backend code manages to detect the pattern, and
* the pattern doesn't change there...
*/
struct lp_type type_ext = bld8->type;
LLVMTypeRef vec_type_ext;
LLVMValueRef res;
LLVMValueRef ext_one;
type_ext.width = 16;
vec_type_ext = lp_build_vec_type(gallivm, type_ext);
ext_one = lp_build_const_vec(gallivm, type_ext, 1);
v0 = LLVMBuildZExt(builder, v0, vec_type_ext, "");
v1 = LLVMBuildZExt(builder, v1, vec_type_ext, "");
res = LLVMBuildAdd(builder, v0, v1, "");
res = LLVMBuildAdd(builder, res, ext_one, "");
res = LLVMBuildLShr(builder, res, ext_one, "");
res = LLVMBuildTrunc(builder, res, bld8->vec_type, "");
return res;
}
/*
* Must match llvm's autoupgrade of pavg.b intrinsic to be useful.
* You better hope the backend code manages to detect the pattern, and
* the pattern doesn't change there...
*/
struct lp_type type_ext = bld8->type;
LLVMTypeRef vec_type_ext;
LLVMValueRef res;
LLVMValueRef ext_one;
type_ext.width = 16;
vec_type_ext = lp_build_vec_type(gallivm, type_ext);
ext_one = lp_build_const_vec(gallivm, type_ext, 1);
v0 = LLVMBuildZExt(builder, v0, vec_type_ext, "");
v1 = LLVMBuildZExt(builder, v1, vec_type_ext, "");
res = LLVMBuildAdd(builder, v0, v1, "");
res = LLVMBuildAdd(builder, res, ext_one, "");
res = LLVMBuildLShr(builder, res, ext_one, "");
res = LLVMBuildTrunc(builder, res, bld8->vec_type, "");
return res;
}
/**

View file

@ -30,10 +30,8 @@
#include <llvm-c/Analysis.h>
#if LLVM_VERSION_MAJOR < 17
#include <llvm-c/Transforms/Scalar.h>
#if LLVM_VERSION_MAJOR >= 7
#include <llvm-c/Transforms/Utils.h>
#endif
#endif
#include <llvm-c/BitWriter.h>
#include <llvm/ADT/StringMap.h>

View file

@ -208,15 +208,9 @@ class DelegatingJITMemoryManager : public BaseMemoryManager {
virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
mgr()->registerEHFrames(Addr, LoadAddr, Size);
}
#if LLVM_VERSION_MAJOR >= 5
virtual void deregisterEHFrames() {
mgr()->deregisterEHFrames();
}
#else
virtual void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
mgr()->deregisterEHFrames(Addr, LoadAddr, Size);
}
#endif
virtual void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure=true) {
return mgr()->getPointerToNamedFunction(Name, AbortOnFailure);

View file

@ -32,24 +32,13 @@
#include "lp_bld_passmgr.h"
#include "lp_bld_init.h"
#if LLVM_VERSION_MAJOR >= 15
#define HAVE_CORO 0
#define USE_NEW_PASS 1
#elif LLVM_VERSION_MAJOR >= 8
#define HAVE_CORO 1
#define USE_NEW_PASS 0
#else
#define HAVE_CORO 0
#define USE_NEW_PASS 0
#endif
#define USE_NEW_PASS (LLVM_VERSION_MAJOR >= 15)
#if USE_NEW_PASS == 1
#include <llvm-c/Transforms/PassBuilder.h>
#elif HAVE_CORO == 1
#else
#include <llvm-c/Transforms/Scalar.h>
#if LLVM_VERSION_MAJOR >= 7
#include <llvm-c/Transforms/Utils.h>
#endif
#if LLVM_VERSION_MAJOR <= 8 && (DETECT_ARCH_AARCH64 || DETECT_ARCH_ARM || DETECT_ARCH_S390 || DETECT_ARCH_MIPS64)
#include <llvm-c/Transforms/IPO.h>
#endif
@ -59,9 +48,7 @@
#if USE_NEW_PASS == 0
struct lp_passmgr {
LLVMPassManagerRef passmgr;
#if HAVE_CORO == 1
LLVMPassManagerRef cgpassmgr;
#endif
};
#else
struct lp_passmgr;
@ -82,16 +69,13 @@ lp_passmgr_create(LLVMModuleRef module, struct lp_passmgr **mgr_p)
return false;
}
#if HAVE_CORO == 1
mgr->cgpassmgr = LLVMCreatePassManager();
#endif
/*
* TODO: some per module pass manager with IPO passes might be helpful -
* the generated texture functions may benefit from inlining if they are
* simple, or constant propagation into them, etc.
*/
#if HAVE_CORO == 1
#if LLVM_VERSION_MAJOR <= 8 && (DETECT_ARCH_AARCH64 || DETECT_ARCH_ARM || DETECT_ARCH_S390 || DETECT_ARCH_MIPS64)
LLVMAddArgumentPromotionPass(mgr->cgpassmgr);
LLVMAddFunctionAttrsPass(mgr->cgpassmgr);
@ -99,7 +83,6 @@ lp_passmgr_create(LLVMModuleRef module, struct lp_passmgr **mgr_p)
LLVMAddCoroEarlyPass(mgr->cgpassmgr);
LLVMAddCoroSplitPass(mgr->cgpassmgr);
LLVMAddCoroElidePass(mgr->cgpassmgr);
#endif
if ((gallivm_perf & GALLIVM_PERF_NO_OPT) == 0) {
/*
@ -138,9 +121,7 @@ lp_passmgr_create(LLVMModuleRef module, struct lp_passmgr **mgr_p)
*/
LLVMAddPromoteMemoryToRegisterPass(mgr->passmgr);
}
#if HAVE_CORO == 1
LLVMAddCoroCleanupPass(mgr->passmgr);
#endif
#endif
*mgr_p = mgr;
return true;
@ -182,9 +163,7 @@ lp_passmgr_run(struct lp_passmgr *mgr,
LLVMRunPasses(module, passes, tm, opts);
LLVMDisposePassBuilderOptions(opts);
#else
#if HAVE_CORO == 1
LLVMRunPassManager(mgr->cgpassmgr, module);
#endif
/* Run optimization passes */
LLVMInitializeFunctionPassManager(mgr->passmgr);
LLVMValueRef func;
@ -206,6 +185,7 @@ lp_passmgr_run(struct lp_passmgr *mgr,
}
LLVMFinalizeFunctionPassManager(mgr->passmgr);
#endif
if (gallivm_debug & GALLIVM_DEBUG_PERF) {
int64_t time_end = os_time_get();
int time_msec = (int)((time_end - time_begin) / 1000);
@ -224,12 +204,10 @@ lp_passmgr_dispose(struct lp_passmgr *mgr)
mgr->passmgr = NULL;
}
#if HAVE_CORO == 1
if (mgr->cgpassmgr) {
LLVMDisposePassManager(mgr->cgpassmgr);
mgr->cgpassmgr = NULL;
}
#endif
FREE(mgr);
#endif
}

View file

@ -135,10 +135,6 @@ llvmpipe_init_shader_caps(struct pipe_screen *screen)
break;
case PIPE_SHADER_TESS_CTRL:
case PIPE_SHADER_TESS_EVAL:
/* Tessellation shader needs llvm coroutines support */
if (!GALLIVM_COROUTINES)
continue;
FALLTHROUGH;
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
draw_init_shader_caps(caps);
@ -273,7 +269,7 @@ llvmpipe_init_screen_caps(struct pipe_screen *screen)
caps->vertex_color_clamped = true;
caps->glsl_feature_level_compatibility =
caps->glsl_feature_level = 450;
caps->compute = GALLIVM_COROUTINES;
caps->compute = true;
caps->user_vertex_buffers = true;
caps->tgsi_texcoord = true;
caps->draw_indirect = true;

View file

@ -481,7 +481,7 @@ test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test, unsigned
if (test->ref == &nearbyintf && length == 2 &&
!util_get_cpu_caps()->has_neon &&
DETECT_ARCH_S390 == false &&
!(util_get_cpu_caps()->has_sse4_1 && LLVM_VERSION_MAJOR >= 8) &&
!util_get_cpu_caps()->has_sse4_1 &&
ref != roundf(testval)) {
/* FIXME: The generic (non SSE) path in lp_build_iround, which is
* always taken for length==2 regardless of native round support,