From a9e1ecf6f014d8d0a71bfbbf805458fdcd9015f6 Mon Sep 17 00:00:00 2001 From: Jiale Zhao Date: Thu, 1 Aug 2024 15:29:26 +0800 Subject: [PATCH] llvmpipe: add loongarch util_get_cpu_caps function When use LA464(3A5000 pc), LLVM18 and kernel(which not supports lsx/lasx), it will get a illegal instruction error. LA464 will return LA464 and LA664 will return a generic when calling getHostCPUName(). Return LA464 name will add all feature(+f,+d,+lsx,+lasx,lvz) by default and generate vector instruction, generic will add null. Hence we use util_get_cpu_caps to judge if kernel supports lsx/lasx or not. Set lsx and lasx by util_get_cpu_caps. Only LLVM 18 on loongarch supports lsx and lasx completely. LLVM 16 isn't supported and LLVM 17's LSX support is incomplete. Signed-off-by: Jiale Zhao Part-of: --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 13 +++++--- src/gallium/drivers/llvmpipe/lp_screen.c | 6 ++-- src/util/u_cpu_detect.c | 31 +++++++++++++++++++ src/util/u_cpu_detect.h | 2 ++ 4 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index f25db6a763d..e5f4dfc829f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -417,8 +417,6 @@ lp_build_fill_mattrs(std::vector &MAttrs) #if DETECT_ARCH_LOONGARCH64 == 1 /* - * TODO: Implement util_get_cpu_caps() - * * No FPU-less LoongArch64 systems are ever shipped yet, and LP64D is * the default ABI, so FPU is enabled here. * @@ -427,9 +425,16 @@ lp_build_fill_mattrs(std::vector &MAttrs) * https://github.com/loongson/la-softdev-convention/releases/download/v0.1/la-softdev-convention.pdf */ MAttrs = {"+f","+d"}; -#if LLVM_VERSION_MAJOR == 17 - /* LLVM 17's LSX support is incomplete, so explicitly mask it */ +#if LLVM_VERSION_MAJOR >= 18 + MAttrs.push_back(util_get_cpu_caps()->has_lsx ? "+lsx" : "-lsx"); + MAttrs.push_back(util_get_cpu_caps()->has_lasx ? "+lasx" : "-lasx"); +#else + /* + * LLVM 17's LSX support is incomplete, and LLVM 16 isn't supported + * LSX and LASX. So explicitly mask it. + */ MAttrs.push_back("-lsx"); + MAttrs.push_back("-lasx"); #endif #endif } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 9a745b3ed50..ffa7f5608bc 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -997,11 +997,11 @@ update_cache_sha1_cpu(struct mesa_sha1 *ctx) const struct util_cpu_caps_t *cpu_caps = util_get_cpu_caps(); /* * Don't need the cpu cache affinity stuff. The rest - * is contained in first 5 dwords. + * is contained in first 6 dwords. */ STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) - == 5 * sizeof(uint32_t)); - _mesa_sha1_update(ctx, cpu_caps, 5 * sizeof(uint32_t)); + == 6 * sizeof(uint32_t)); + _mesa_sha1_update(ctx, cpu_caps, 6 * sizeof(uint32_t)); } diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c index e77bdbb1b6a..eeea012e1cc 100644 --- a/src/util/u_cpu_detect.c +++ b/src/util/u_cpu_detect.c @@ -470,6 +470,31 @@ check_os_mips64_support(void) } #endif /* DETECT_ARCH_MIPS64 */ +#if DETECT_ARCH_LOONGARCH64 +static void +check_os_loongarch64_support(void) +{ +#if DETECT_OS_LINUX + Elf64_auxv_t aux; + int fd; + + fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); + if (fd >= 0) { + while (read(fd, &aux, sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { + if (aux.a_type == AT_HWCAP) { + uint64_t hwcap = aux.a_un.a_val; + + util_cpu_caps.has_lsx = (hwcap >> 4) & 1; + util_cpu_caps.has_lasx = (hwcap >> 5) & 1; + break; + } + } + close (fd); + } +#endif /* DETECT_OS_LINUX */ +} +#endif /* DETECT_ARCH_LOONGARCH64 */ + static void get_cpu_topology(void) @@ -943,6 +968,10 @@ _util_cpu_detect_once(void) check_os_mips64_support(); #endif /* DETECT_ARCH_MIPS64 */ +#if DETECT_ARCH_LOONGARCH64 + check_os_loongarch64_support(); +#endif /* DETECT_ARCH_LOONGARCH64 */ + #if DETECT_ARCH_S390 util_cpu_caps.family = CPU_S390X; #endif @@ -980,6 +1009,8 @@ _util_cpu_detect_once(void) printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon); printf("util_cpu_caps.has_msa = %u\n", util_cpu_caps.has_msa); printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz); + printf("util_cpu_caps.has_lsx = %u\n", util_cpu_caps.has_lsx); + printf("util_cpu_caps.has_lasx = %u\n", util_cpu_caps.has_lasx); printf("util_cpu_caps.has_avx512f = %u\n", util_cpu_caps.has_avx512f); printf("util_cpu_caps.has_avx512dq = %u\n", util_cpu_caps.has_avx512dq); printf("util_cpu_caps.has_avx512ifma = %u\n", util_cpu_caps.has_avx512ifma); diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h index 6ae0c5dbe2d..13868482b76 100644 --- a/src/util/u_cpu_detect.h +++ b/src/util/u_cpu_detect.h @@ -109,6 +109,8 @@ struct util_cpu_caps_t { unsigned has_daz:1; unsigned has_neon:1; unsigned has_msa:1; + unsigned has_lsx:1; + unsigned has_lasx:1; unsigned has_avx512f:1; unsigned has_avx512dq:1;