util: Add accessor for util_cpu_caps

In release builds, there should be no change, but in debug builds the
assert will help us catch undefined behavior resulting from using
util_cpu_caps before it is initialized.

With fix for u_half_test for MSVC from Jesse Natalie squashed in.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9266>
This commit is contained in:
Rob Clark 2021-02-25 10:04:50 -08:00 committed by Marge Bot
parent 9fb9019beb
commit a9618e7c42
38 changed files with 187 additions and 162 deletions

View file

@ -480,8 +480,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
util_cpu_detect(); util_cpu_detect();
info->smart_access_memory = info->all_vram_visible && info->smart_access_memory = info->all_vram_visible &&
info->chip_class >= GFX10_3 && info->chip_class >= GFX10_3 &&
util_cpu_caps.family >= CPU_AMD_ZEN3 && util_get_cpu_caps()->family >= CPU_AMD_ZEN3 &&
util_cpu_caps.family < CPU_AMD_LAST; util_get_cpu_caps()->family < CPU_AMD_LAST;
/* Set chip identification. */ /* Set chip identification. */
info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */ info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */

View file

@ -34,6 +34,8 @@
#include "aco_ir.h" #include "aco_ir.h"
#include "framework.h" #include "framework.h"
#include "util/u_cpu_detect.h"
static const char *help_message = static const char *help_message =
"Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n" "Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n"
"\n" "\n"
@ -241,6 +243,8 @@ int main(int argc, char **argv)
return 99; return 99;
} }
util_cpu_detect();
if (do_list) { if (do_list) {
for (auto test : tests) for (auto test : tests)
printf("%s\n", test.first.c_str()); printf("%s\n", test.first.c_str());

View file

@ -398,6 +398,8 @@ standalone_compile_shader(const struct standalone_options *_options,
int status = EXIT_SUCCESS; int status = EXIT_SUCCESS;
bool glsl_es = false; bool glsl_es = false;
util_cpu_detect();
options = _options; options = _options;
switch (options->glsl_version) { switch (options->glsl_version) {

View file

@ -36,6 +36,7 @@ protected:
const_value_negative_equal_test() const_value_negative_equal_test()
{ {
glsl_type_singleton_init_or_ref(); glsl_type_singleton_init_or_ref();
util_cpu_detect();
memset(c1, 0, sizeof(c1)); memset(c1, 0, sizeof(c1));
memset(c2, 0, sizeof(c2)); memset(c2, 0, sizeof(c2));
@ -55,6 +56,7 @@ protected:
alu_srcs_negative_equal_test() alu_srcs_negative_equal_test()
{ {
glsl_type_singleton_init_or_ref(); glsl_type_singleton_init_or_ref();
util_cpu_detect();
static const nir_shader_compiler_options options = { }; static const nir_shader_compiler_options options = { };
bld = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options, bld = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options,

View file

@ -679,6 +679,8 @@ isa_decode(void *bin, int sz, FILE *out, const struct isa_decode_options *option
if (!options) if (!options)
options = &default_options; options = &default_options;
util_cpu_detect(); /* needed for _mesa_half_to_float() */
state = rzalloc_size(NULL, sizeof(*state)); state = rzalloc_size(NULL, sizeof(*state));
state->options = options; state->options = options;
state->num_instr = sz / 8; state->num_instr = sz / 8;

View file

@ -104,13 +104,13 @@ lp_build_min_simple(struct lp_build_context *bld,
/* TODO: optimize the constant case */ /* TODO: optimize the constant case */
if (type.floating && util_cpu_caps.has_sse) { if (type.floating && util_get_cpu_caps()->has_sse) {
if (type.width == 32) { if (type.width == 32) {
if (type.length == 1) { if (type.length == 1) {
intrinsic = "llvm.x86.sse.min.ss"; intrinsic = "llvm.x86.sse.min.ss";
intr_size = 128; intr_size = 128;
} }
else if (type.length <= 4 || !util_cpu_caps.has_avx) { else if (type.length <= 4 || !util_get_cpu_caps()->has_avx) {
intrinsic = "llvm.x86.sse.min.ps"; intrinsic = "llvm.x86.sse.min.ps";
intr_size = 128; intr_size = 128;
} }
@ -119,12 +119,12 @@ lp_build_min_simple(struct lp_build_context *bld,
intr_size = 256; intr_size = 256;
} }
} }
if (type.width == 64 && util_cpu_caps.has_sse2) { if (type.width == 64 && util_get_cpu_caps()->has_sse2) {
if (type.length == 1) { if (type.length == 1) {
intrinsic = "llvm.x86.sse2.min.sd"; intrinsic = "llvm.x86.sse2.min.sd";
intr_size = 128; intr_size = 128;
} }
else if (type.length == 2 || !util_cpu_caps.has_avx) { else if (type.length == 2 || !util_get_cpu_caps()->has_avx) {
intrinsic = "llvm.x86.sse2.min.pd"; intrinsic = "llvm.x86.sse2.min.pd";
intr_size = 128; intr_size = 128;
} }
@ -134,7 +134,7 @@ lp_build_min_simple(struct lp_build_context *bld,
} }
} }
} }
else if (type.floating && util_cpu_caps.has_altivec) { else if (type.floating && util_get_cpu_caps()->has_altivec) {
if (nan_behavior == GALLIVM_NAN_RETURN_NAN || if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
debug_printf("%s: altivec doesn't support nan return nan behavior\n", debug_printf("%s: altivec doesn't support nan return nan behavior\n",
@ -144,7 +144,7 @@ lp_build_min_simple(struct lp_build_context *bld,
intrinsic = "llvm.ppc.altivec.vminfp"; intrinsic = "llvm.ppc.altivec.vminfp";
intr_size = 128; intr_size = 128;
} }
} else if (util_cpu_caps.has_altivec) { } else if (util_get_cpu_caps()->has_altivec) {
intr_size = 128; intr_size = 128;
if (type.width == 8) { if (type.width == 8) {
if (!type.sign) { if (!type.sign) {
@ -174,7 +174,7 @@ lp_build_min_simple(struct lp_build_context *bld,
* The sse intrinsics return the second operator in case of nan by * The sse intrinsics return the second operator in case of nan by
* default so we need to special code to handle those. * default so we need to special code to handle those.
*/ */
if (util_cpu_caps.has_sse && type.floating && if (util_get_cpu_caps()->has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN && nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
@ -274,13 +274,13 @@ lp_build_max_simple(struct lp_build_context *bld,
/* TODO: optimize the constant case */ /* TODO: optimize the constant case */
if (type.floating && util_cpu_caps.has_sse) { if (type.floating && util_get_cpu_caps()->has_sse) {
if (type.width == 32) { if (type.width == 32) {
if (type.length == 1) { if (type.length == 1) {
intrinsic = "llvm.x86.sse.max.ss"; intrinsic = "llvm.x86.sse.max.ss";
intr_size = 128; intr_size = 128;
} }
else if (type.length <= 4 || !util_cpu_caps.has_avx) { else if (type.length <= 4 || !util_get_cpu_caps()->has_avx) {
intrinsic = "llvm.x86.sse.max.ps"; intrinsic = "llvm.x86.sse.max.ps";
intr_size = 128; intr_size = 128;
} }
@ -289,12 +289,12 @@ lp_build_max_simple(struct lp_build_context *bld,
intr_size = 256; intr_size = 256;
} }
} }
if (type.width == 64 && util_cpu_caps.has_sse2) { if (type.width == 64 && util_get_cpu_caps()->has_sse2) {
if (type.length == 1) { if (type.length == 1) {
intrinsic = "llvm.x86.sse2.max.sd"; intrinsic = "llvm.x86.sse2.max.sd";
intr_size = 128; intr_size = 128;
} }
else if (type.length == 2 || !util_cpu_caps.has_avx) { else if (type.length == 2 || !util_get_cpu_caps()->has_avx) {
intrinsic = "llvm.x86.sse2.max.pd"; intrinsic = "llvm.x86.sse2.max.pd";
intr_size = 128; intr_size = 128;
} }
@ -304,7 +304,7 @@ lp_build_max_simple(struct lp_build_context *bld,
} }
} }
} }
else if (type.floating && util_cpu_caps.has_altivec) { else if (type.floating && util_get_cpu_caps()->has_altivec) {
if (nan_behavior == GALLIVM_NAN_RETURN_NAN || if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
debug_printf("%s: altivec doesn't support nan return nan behavior\n", debug_printf("%s: altivec doesn't support nan return nan behavior\n",
@ -314,7 +314,7 @@ lp_build_max_simple(struct lp_build_context *bld,
intrinsic = "llvm.ppc.altivec.vmaxfp"; intrinsic = "llvm.ppc.altivec.vmaxfp";
intr_size = 128; intr_size = 128;
} }
} else if (util_cpu_caps.has_altivec) { } else if (util_get_cpu_caps()->has_altivec) {
intr_size = 128; intr_size = 128;
if (type.width == 8) { if (type.width == 8) {
if (!type.sign) { if (!type.sign) {
@ -338,7 +338,7 @@ lp_build_max_simple(struct lp_build_context *bld,
} }
if (intrinsic) { if (intrinsic) {
if (util_cpu_caps.has_sse && type.floating && if (util_get_cpu_caps()->has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN && nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
@ -472,12 +472,12 @@ lp_build_add(struct lp_build_context *bld,
return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b); return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
} }
if (type.width * type.length == 128) { if (type.width * type.length == 128) {
if (util_cpu_caps.has_sse2) { if (util_get_cpu_caps()->has_sse2) {
if (type.width == 8) if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
if (type.width == 16) if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
} else if (util_cpu_caps.has_altivec) { } else if (util_get_cpu_caps()->has_altivec) {
if (type.width == 8) if (type.width == 8)
intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
if (type.width == 16) if (type.width == 16)
@ -485,7 +485,7 @@ lp_build_add(struct lp_build_context *bld,
} }
} }
if (type.width * type.length == 256) { if (type.width * type.length == 256) {
if (util_cpu_caps.has_avx2) { if (util_get_cpu_caps()->has_avx2) {
if (type.width == 8) if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b"; intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b";
if (type.width == 16) if (type.width == 16)
@ -713,11 +713,11 @@ lp_build_hadd_partial4(struct lp_build_context *bld,
tmp[2] = num_vecs > 2 ? vectors[2] : vectors[0]; tmp[2] = num_vecs > 2 ? vectors[2] : vectors[0];
tmp[3] = num_vecs > 3 ? vectors[3] : vectors[0]; tmp[3] = num_vecs > 3 ? vectors[3] : vectors[0];
if (util_cpu_caps.has_sse3 && bld->type.width == 32 && if (util_get_cpu_caps()->has_sse3 && bld->type.width == 32 &&
bld->type.length == 4) { bld->type.length == 4) {
intrinsic = "llvm.x86.sse3.hadd.ps"; intrinsic = "llvm.x86.sse3.hadd.ps";
} }
else if (util_cpu_caps.has_avx && bld->type.width == 32 && else if (util_get_cpu_caps()->has_avx && bld->type.width == 32 &&
bld->type.length == 8) { bld->type.length == 8) {
intrinsic = "llvm.x86.avx.hadd.ps.256"; intrinsic = "llvm.x86.avx.hadd.ps.256";
} }
@ -796,12 +796,12 @@ lp_build_sub(struct lp_build_context *bld,
return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b); return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
} }
if (type.width * type.length == 128) { if (type.width * type.length == 128) {
if (util_cpu_caps.has_sse2) { if (util_get_cpu_caps()->has_sse2) {
if (type.width == 8) if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
if (type.width == 16) if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
} else if (util_cpu_caps.has_altivec) { } else if (util_get_cpu_caps()->has_altivec) {
if (type.width == 8) if (type.width == 8)
intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs"; intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
if (type.width == 16) if (type.width == 16)
@ -809,7 +809,7 @@ lp_build_sub(struct lp_build_context *bld,
} }
} }
if (type.width * type.length == 256) { if (type.width * type.length == 256) {
if (util_cpu_caps.has_avx2) { if (util_get_cpu_caps()->has_avx2) {
if (type.width == 8) if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b"; intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b";
if (type.width == 16) if (type.width == 16)
@ -1078,8 +1078,8 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
*/ */
if (LLVM_VERSION_MAJOR < 7 && if (LLVM_VERSION_MAJOR < 7 &&
(bld->type.length == 4 || bld->type.length == 8) && (bld->type.length == 4 || bld->type.length == 8) &&
((util_cpu_caps.has_sse2 && (bld->type.sign == 0)) || ((util_get_cpu_caps()->has_sse2 && (bld->type.sign == 0)) ||
util_cpu_caps.has_sse4_1)) { util_get_cpu_caps()->has_sse4_1)) {
const char *intrinsic = NULL; const char *intrinsic = NULL;
LLVMValueRef aeven, aodd, beven, bodd, muleven, mulodd; LLVMValueRef aeven, aodd, beven, bodd, muleven, mulodd;
LLVMValueRef shuf[LP_MAX_VECTOR_WIDTH / 32], shuf_vec; LLVMValueRef shuf[LP_MAX_VECTOR_WIDTH / 32], shuf_vec;
@ -1096,7 +1096,7 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
aodd = LLVMBuildShuffleVector(builder, aeven, bld->undef, shuf_vec, ""); aodd = LLVMBuildShuffleVector(builder, aeven, bld->undef, shuf_vec, "");
bodd = LLVMBuildShuffleVector(builder, beven, bld->undef, shuf_vec, ""); bodd = LLVMBuildShuffleVector(builder, beven, bld->undef, shuf_vec, "");
if (util_cpu_caps.has_avx2 && bld->type.length == 8) { if (util_get_cpu_caps()->has_avx2 && bld->type.length == 8) {
if (bld->type.sign) { if (bld->type.sign) {
intrinsic = "llvm.x86.avx2.pmul.dq"; intrinsic = "llvm.x86.avx2.pmul.dq";
} else { } else {
@ -1331,8 +1331,8 @@ lp_build_div(struct lp_build_context *bld,
/* fast rcp is disabled (just uses div), so makes no sense to try that */ /* fast rcp is disabled (just uses div), so makes no sense to try that */
if(FALSE && if(FALSE &&
((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) || ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) ||
(util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) && (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) &&
type.floating) type.floating)
return lp_build_mul(bld, a, lp_build_rcp(bld, b)); return lp_build_mul(bld, a, lp_build_rcp(bld, b));
@ -1745,7 +1745,7 @@ lp_build_abs(struct lp_build_context *bld,
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a); return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
} }
if(type.width*type.length == 128 && util_cpu_caps.has_ssse3 && LLVM_VERSION_MAJOR < 6) { if(type.width*type.length == 128 && util_get_cpu_caps()->has_ssse3 && LLVM_VERSION_MAJOR < 6) {
switch(type.width) { switch(type.width) {
case 8: case 8:
return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
@ -1755,7 +1755,7 @@ lp_build_abs(struct lp_build_context *bld,
return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
} }
} }
else if (type.width*type.length == 256 && util_cpu_caps.has_avx2 && LLVM_VERSION_MAJOR < 6) { else if (type.width*type.length == 256 && util_get_cpu_caps()->has_avx2 && LLVM_VERSION_MAJOR < 6) {
switch(type.width) { switch(type.width) {
case 8: case 8:
return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", vec_type, a); return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", vec_type, a);
@ -1897,15 +1897,15 @@ lp_build_int_to_float(struct lp_build_context *bld,
static boolean static boolean
arch_rounding_available(const struct lp_type type) arch_rounding_available(const struct lp_type type)
{ {
if ((util_cpu_caps.has_sse4_1 && if ((util_get_cpu_caps()->has_sse4_1 &&
(type.length == 1 || type.width*type.length == 128)) || (type.length == 1 || type.width*type.length == 128)) ||
(util_cpu_caps.has_avx && type.width*type.length == 256) || (util_get_cpu_caps()->has_avx && type.width*type.length == 256) ||
(util_cpu_caps.has_avx512f && type.width*type.length == 512)) (util_get_cpu_caps()->has_avx512f && type.width*type.length == 512))
return TRUE; return TRUE;
else if ((util_cpu_caps.has_altivec && else if ((util_get_cpu_caps()->has_altivec &&
(type.width == 32 && type.length == 4))) (type.width == 32 && type.length == 4)))
return TRUE; return TRUE;
else if (util_cpu_caps.has_neon) else if (util_get_cpu_caps()->has_neon)
return TRUE; return TRUE;
return FALSE; return FALSE;
@ -1935,7 +1935,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld,
assert(type.width == 32); assert(type.width == 32);
assert(lp_check_value(type, a)); assert(lp_check_value(type, a));
assert(util_cpu_caps.has_sse2); assert(util_get_cpu_caps()->has_sse2);
/* This is relying on MXCSR rounding mode, which should always be nearest. */ /* This is relying on MXCSR rounding mode, which should always be nearest. */
if (type.length == 1) { if (type.length == 1) {
@ -1961,7 +1961,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld,
} }
else { else {
assert(type.width*type.length == 256); assert(type.width*type.length == 256);
assert(util_cpu_caps.has_avx); assert(util_get_cpu_caps()->has_avx);
intrinsic = "llvm.x86.avx.cvt.ps2dq.256"; intrinsic = "llvm.x86.avx.cvt.ps2dq.256";
} }
@ -1987,7 +1987,7 @@ lp_build_round_altivec(struct lp_build_context *bld,
assert(type.floating); assert(type.floating);
assert(lp_check_value(type, a)); assert(lp_check_value(type, a));
assert(util_cpu_caps.has_altivec); assert(util_get_cpu_caps()->has_altivec);
(void)type; (void)type;
@ -2014,7 +2014,7 @@ lp_build_round_arch(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
enum lp_build_round_mode mode) enum lp_build_round_mode mode)
{ {
if (util_cpu_caps.has_sse4_1 || util_cpu_caps.has_neon) { if (util_get_cpu_caps()->has_sse4_1 || util_get_cpu_caps()->has_neon) {
LLVMBuilderRef builder = bld->gallivm->builder; LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type; const struct lp_type type = bld->type;
const char *intrinsic_root; const char *intrinsic_root;
@ -2044,7 +2044,7 @@ lp_build_round_arch(struct lp_build_context *bld,
lp_format_intrinsic(intrinsic, sizeof intrinsic, intrinsic_root, bld->vec_type); lp_format_intrinsic(intrinsic, sizeof intrinsic, intrinsic_root, bld->vec_type);
return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a); return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
} }
else /* (util_cpu_caps.has_altivec) */ else /* (util_get_cpu_caps()->has_altivec) */
return lp_build_round_altivec(bld, a, mode); return lp_build_round_altivec(bld, a, mode);
} }
@ -2379,9 +2379,9 @@ lp_build_iround(struct lp_build_context *bld,
assert(lp_check_value(type, a)); assert(lp_check_value(type, a));
if ((util_cpu_caps.has_sse2 && if ((util_get_cpu_caps()->has_sse2 &&
((type.width == 32) && (type.length == 1 || type.length == 4))) || ((type.width == 32) && (type.length == 1 || type.length == 4))) ||
(util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) { (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) {
return lp_build_iround_nearest_sse2(bld, a); return lp_build_iround_nearest_sse2(bld, a);
} }
if (arch_rounding_available(type)) { if (arch_rounding_available(type)) {
@ -2666,8 +2666,8 @@ lp_build_rcp(struct lp_build_context *bld,
* particular uses that require less workarounds. * particular uses that require less workarounds.
*/ */
if (FALSE && ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) || if (FALSE && ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) ||
(util_cpu_caps.has_avx && type.width == 32 && type.length == 8))){ (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8))){
const unsigned num_iterations = 0; const unsigned num_iterations = 0;
LLVMValueRef res; LLVMValueRef res;
unsigned i; unsigned i;
@ -2786,8 +2786,8 @@ lp_build_fast_rsqrt_available(struct lp_type type)
{ {
assert(type.floating); assert(type.floating);
if ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) || if ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) ||
(util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) { (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) {
return true; return true;
} }
return false; return false;
@ -3696,7 +3696,7 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
LLVMValueRef LLVMValueRef
lp_build_fpstate_get(struct gallivm_state *gallivm) lp_build_fpstate_get(struct gallivm_state *gallivm)
{ {
if (util_cpu_caps.has_sse) { if (util_get_cpu_caps()->has_sse) {
LLVMBuilderRef builder = gallivm->builder; LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef mxcsr_ptr = lp_build_alloca( LLVMValueRef mxcsr_ptr = lp_build_alloca(
gallivm, gallivm,
@ -3717,7 +3717,7 @@ void
lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
boolean zero) boolean zero)
{ {
if (util_cpu_caps.has_sse) { if (util_get_cpu_caps()->has_sse) {
/* turn on DAZ (64) | FTZ (32768) = 32832 if available */ /* turn on DAZ (64) | FTZ (32768) = 32832 if available */
int daz_ftz = _MM_FLUSH_ZERO_MASK; int daz_ftz = _MM_FLUSH_ZERO_MASK;
@ -3726,7 +3726,7 @@ lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
LLVMValueRef mxcsr = LLVMValueRef mxcsr =
LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr"); LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr");
if (util_cpu_caps.has_daz) { if (util_get_cpu_caps()->has_daz) {
/* Enable denormals are zero mode */ /* Enable denormals are zero mode */
daz_ftz |= _MM_DENORMALS_ZERO_MASK; daz_ftz |= _MM_DENORMALS_ZERO_MASK;
} }
@ -3747,7 +3747,7 @@ void
lp_build_fpstate_set(struct gallivm_state *gallivm, lp_build_fpstate_set(struct gallivm_state *gallivm,
LLVMValueRef mxcsr_ptr) LLVMValueRef mxcsr_ptr)
{ {
if (util_cpu_caps.has_sse) { if (util_get_cpu_caps()->has_sse) {
LLVMBuilderRef builder = gallivm->builder; LLVMBuilderRef builder = gallivm->builder;
mxcsr_ptr = LLVMBuildPointerCast(builder, mxcsr_ptr, mxcsr_ptr = LLVMBuildPointerCast(builder, mxcsr_ptr,
LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), ""); LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");

View file

@ -110,7 +110,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
LLVMValueRef h; LLVMValueRef h;
if (util_cpu_caps.has_f16c && if (util_get_cpu_caps()->has_f16c &&
(src_length == 4 || src_length == 8)) { (src_length == 4 || src_length == 8)) {
if (LLVM_VERSION_MAJOR < 11) { if (LLVM_VERSION_MAJOR < 11) {
const char *intrinsic = NULL; const char *intrinsic = NULL;
@ -176,7 +176,7 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
* useless. * useless.
*/ */
if (util_cpu_caps.has_f16c && if (util_get_cpu_caps()->has_f16c &&
(length == 4 || length == 8)) { (length == 4 || length == 8)) {
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8); struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */ unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */
@ -498,7 +498,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm,
/* Special case 4x4x32 --> 1x16x8 */ /* Special case 4x4x32 --> 1x16x8 */
if (src_type.length == 4 && if (src_type.length == 4 &&
(util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec)) (util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec))
{ {
num_dsts = (num_srcs + 3) / 4; num_dsts = (num_srcs + 3) / 4;
dst_type->length = num_srcs * 4 >= 16 ? 16 : num_srcs * 4; dst_type->length = num_srcs * 4 >= 16 ? 16 : num_srcs * 4;
@ -509,7 +509,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm,
/* Special case 2x8x32 --> 1x16x8 */ /* Special case 2x8x32 --> 1x16x8 */
if (src_type.length == 8 && if (src_type.length == 8 &&
util_cpu_caps.has_avx) util_get_cpu_caps()->has_avx)
{ {
num_dsts = (num_srcs + 1) / 2; num_dsts = (num_srcs + 1) / 2;
dst_type->length = num_srcs * 8 >= 16 ? 16 : num_srcs * 8; dst_type->length = num_srcs * 8 >= 16 ? 16 : num_srcs * 8;
@ -606,7 +606,7 @@ lp_build_conv(struct gallivm_state *gallivm,
((dst_type.length == 16 && 4 * num_dsts == num_srcs) || ((dst_type.length == 16 && 4 * num_dsts == num_srcs) ||
(num_dsts == 1 && dst_type.length * num_srcs == 16 && num_srcs != 3)) && (num_dsts == 1 && dst_type.length * num_srcs == 16 && num_srcs != 3)) &&
(util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec)) (util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec))
{ {
struct lp_build_context bld; struct lp_build_context bld;
struct lp_type int16_type, int32_type; struct lp_type int16_type, int32_type;
@ -719,7 +719,7 @@ lp_build_conv(struct gallivm_state *gallivm,
((dst_type.length == 16 && 2 * num_dsts == num_srcs) || ((dst_type.length == 16 && 2 * num_dsts == num_srcs) ||
(num_dsts == 1 && dst_type.length * num_srcs == 8)) && (num_dsts == 1 && dst_type.length * num_srcs == 8)) &&
util_cpu_caps.has_avx) { util_get_cpu_caps()->has_avx) {
struct lp_build_context bld; struct lp_build_context bld;
struct lp_type int16_type, int32_type; struct lp_type int16_type, int32_type;

View file

@ -642,8 +642,8 @@ s3tc_dxt1_full_to_rgba_aos(struct gallivm_state *gallivm,
* XXX with sse2 and 16x8 vectors, should use pavgb even when n == 1. * XXX with sse2 and 16x8 vectors, should use pavgb even when n == 1.
* Much cheaper (but we don't care that much if n == 1). * Much cheaper (but we don't care that much if n == 1).
*/ */
if ((util_cpu_caps.has_sse2 && n == 4) || if ((util_get_cpu_caps()->has_sse2 && n == 4) ||
(util_cpu_caps.has_avx2 && n == 8)) { (util_get_cpu_caps()->has_avx2 && n == 8)) {
color2_2 = lp_build_pavgb(&bld8, colors0, colors1); color2_2 = lp_build_pavgb(&bld8, colors0, colors1);
color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, ""); color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, "");
} }
@ -1350,7 +1350,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm,
if (is_dxt1_variant) { if (is_dxt1_variant) {
LLVMValueRef color23_2, color2_2; LLVMValueRef color23_2, color2_2;
if (util_cpu_caps.has_sse2) { if (util_get_cpu_caps()->has_sse2) {
LLVMValueRef intrargs[2]; LLVMValueRef intrargs[2];
intrargs[0] = LLVMBuildBitCast(builder, color01, bld8.vec_type, ""); intrargs[0] = LLVMBuildBitCast(builder, color01, bld8.vec_type, "");
/* same interleave as for lerp23 - correct result in 2nd element */ /* same interleave as for lerp23 - correct result in 2nd element */
@ -1389,7 +1389,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm,
color23 = lp_build_select(&bld32, sel_mask, color23, color23_2); color23 = lp_build_select(&bld32, sel_mask, color23, color23_2);
} }
if (util_cpu_caps.has_ssse3) { if (util_get_cpu_caps()->has_ssse3) {
/* /*
* Use pshufb as mini-lut. (Only doable with intrinsics as the * Use pshufb as mini-lut. (Only doable with intrinsics as the
* final shuffles are non-constant. pshufb is awesome!) * final shuffles are non-constant. pshufb is awesome!)
@ -1689,7 +1689,7 @@ s3tc_decode_block_dxt5(struct gallivm_state *gallivm,
type16.sign = FALSE; type16.sign = FALSE;
sel_mask = LLVMBuildBitCast(builder, sel_mask, bld8.vec_type, ""); sel_mask = LLVMBuildBitCast(builder, sel_mask, bld8.vec_type, "");
if (!util_cpu_caps.has_ssse3) { if (!util_get_cpu_caps()->has_ssse3) {
LLVMValueRef acodeg, mask1, acode0, acode1; LLVMValueRef acodeg, mask1, acode0, acode1;
/* extraction of the 3 bit values into something more useful is HARD */ /* extraction of the 3 bit values into something more useful is HARD */

View file

@ -90,7 +90,7 @@ uyvy_to_yuv_soa(struct gallivm_state *gallivm,
* per element. Didn't measure performance but cuts shader size * per element. Didn't measure performance but cuts shader size
* by quite a bit (less difference if cpu has no sse4.1 support). * by quite a bit (less difference if cpu has no sse4.1 support).
*/ */
if (util_cpu_caps.has_sse2 && n > 1) { if (util_get_cpu_caps()->has_sse2 && n > 1) {
LLVMValueRef sel, tmp, tmp2; LLVMValueRef sel, tmp, tmp2;
struct lp_build_context bld32; struct lp_build_context bld32;
@ -174,7 +174,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm,
* per element. Didn't measure performance but cuts shader size * per element. Didn't measure performance but cuts shader size
* by quite a bit (less difference if cpu has no sse4.1 support). * by quite a bit (less difference if cpu has no sse4.1 support).
*/ */
if (util_cpu_caps.has_sse2 && n > 1) { if (util_get_cpu_caps()->has_sse2 && n > 1) {
LLVMValueRef sel, tmp; LLVMValueRef sel, tmp;
struct lp_build_context bld32; struct lp_build_context bld32;

View file

@ -488,7 +488,7 @@ lp_build_gather(struct gallivm_state *gallivm,
* 32bit/64bit fetches you're doing it wrong (this is gather, not * 32bit/64bit fetches you're doing it wrong (this is gather, not
* conversion) and it would be awkward for floats. * conversion) and it would be awkward for floats.
*/ */
} else if (util_cpu_caps.has_avx2 && !need_expansion && } else if (util_get_cpu_caps()->has_avx2 && !need_expansion &&
src_width == 32 && (length == 4 || length == 8)) { src_width == 32 && (length == 4 || length == 8)) {
return lp_build_gather_avx2(gallivm, length, src_width, dst_type, return lp_build_gather_avx2(gallivm, length, src_width, dst_type,
base_ptr, offsets); base_ptr, offsets);
@ -500,7 +500,7 @@ lp_build_gather(struct gallivm_state *gallivm,
* (In general, should be more of a win if the fetch is 256bit wide - * (In general, should be more of a win if the fetch is 256bit wide -
* this is true for the 32bit case above too.) * this is true for the 32bit case above too.)
*/ */
} else if (0 && util_cpu_caps.has_avx2 && !need_expansion && } else if (0 && util_get_cpu_caps()->has_avx2 && !need_expansion &&
src_width == 64 && (length == 2 || length == 4)) { src_width == 64 && (length == 2 || length == 4)) {
return lp_build_gather_avx2(gallivm, length, src_width, dst_type, return lp_build_gather_avx2(gallivm, length, src_width, dst_type,
base_ptr, offsets); base_ptr, offsets);

View file

@ -433,6 +433,7 @@ lp_build_init(void)
/* For simulating less capable machines */ /* For simulating less capable machines */
#ifdef DEBUG #ifdef DEBUG
if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) { if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) {
extern struct util_cpu_caps_t util_cpu_caps;
assert(util_cpu_caps.has_sse2); assert(util_cpu_caps.has_sse2);
util_cpu_caps.has_sse3 = 0; util_cpu_caps.has_sse3 = 0;
util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_ssse3 = 0;
@ -445,7 +446,7 @@ lp_build_init(void)
} }
#endif #endif
if (util_cpu_caps.has_avx2 || util_cpu_caps.has_avx) { if (util_get_cpu_caps()->has_avx2 || util_get_cpu_caps()->has_avx) {
lp_native_vector_width = 256; lp_native_vector_width = 256;
} else { } else {
/* Leave it at 128, even when no SIMD extensions are available. /* Leave it at 128, even when no SIMD extensions are available.
@ -460,16 +461,16 @@ lp_build_init(void)
#if LLVM_VERSION_MAJOR < 4 #if LLVM_VERSION_MAJOR < 4
if (lp_native_vector_width <= 128) { if (lp_native_vector_width <= 128) {
/* Hide AVX support, as often LLVM AVX intrinsics are only guarded by /* Hide AVX support, as often LLVM AVX intrinsics are only guarded by
* "util_cpu_caps.has_avx" predicate, and lack the * "util_get_cpu_caps()->has_avx" predicate, and lack the
* "lp_native_vector_width > 128" predicate. And also to ensure a more * "lp_native_vector_width > 128" predicate. And also to ensure a more
* consistent behavior, allowing one to test SSE2 on AVX machines. * consistent behavior, allowing one to test SSE2 on AVX machines.
* XXX: should not play games with util_cpu_caps directly as it might * XXX: should not play games with util_cpu_caps directly as it might
* get used for other things outside llvm too. * get used for other things outside llvm too.
*/ */
util_cpu_caps.has_avx = 0; util_get_cpu_caps()->has_avx = 0;
util_cpu_caps.has_avx2 = 0; util_get_cpu_caps()->has_avx2 = 0;
util_cpu_caps.has_f16c = 0; util_get_cpu_caps()->has_f16c = 0;
util_cpu_caps.has_fma = 0; util_get_cpu_caps()->has_fma = 0;
} }
#endif #endif
@ -482,7 +483,7 @@ lp_build_init(void)
* Right now denorms get explicitly disabled (but elsewhere) for x86, * Right now denorms get explicitly disabled (but elsewhere) for x86,
* whereas ppc64 explicitly enables them... * whereas ppc64 explicitly enables them...
*/ */
if (util_cpu_caps.has_altivec) { if (util_get_cpu_caps()->has_altivec) {
unsigned short mask[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, unsigned short mask[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
0xFFFF, 0xFFFF, 0xFFFE, 0xFFFF }; 0xFFFF, 0xFFFF, 0xFFFE, 0xFFFF };
__asm ( __asm (

View file

@ -196,7 +196,7 @@ lp_build_compare(struct gallivm_state *gallivm,
if (!type.floating && !type.sign && if (!type.floating && !type.sign &&
type.width * type.length == 128 && type.width * type.length == 128 &&
util_cpu_caps.has_sse2 && util_get_cpu_caps()->has_sse2 &&
(func == PIPE_FUNC_LESS || (func == PIPE_FUNC_LESS ||
func == PIPE_FUNC_LEQUAL || func == PIPE_FUNC_LEQUAL ||
func == PIPE_FUNC_GREATER || func == PIPE_FUNC_GREATER ||
@ -348,11 +348,11 @@ lp_build_select(struct lp_build_context *bld,
res = LLVMBuildSelect(builder, mask, a, b, ""); res = LLVMBuildSelect(builder, mask, a, b, "");
} }
else if (((util_cpu_caps.has_sse4_1 && else if (((util_get_cpu_caps()->has_sse4_1 &&
type.width * type.length == 128) || type.width * type.length == 128) ||
(util_cpu_caps.has_avx && (util_get_cpu_caps()->has_avx &&
type.width * type.length == 256 && type.width >= 32) || type.width * type.length == 256 && type.width >= 32) ||
(util_cpu_caps.has_avx2 && (util_get_cpu_caps()->has_avx2 &&
type.width * type.length == 256)) && type.width * type.length == 256)) &&
!LLVMIsConstant(a) && !LLVMIsConstant(a) &&
!LLVMIsConstant(b) && !LLVMIsConstant(b) &&
@ -379,7 +379,7 @@ lp_build_select(struct lp_build_context *bld,
intrinsic = "llvm.x86.avx.blendv.ps.256"; intrinsic = "llvm.x86.avx.blendv.ps.256";
arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
} else { } else {
assert(util_cpu_caps.has_avx2); assert(util_get_cpu_caps()->has_avx2);
intrinsic = "llvm.x86.avx2.pblendvb"; intrinsic = "llvm.x86.avx2.pblendvb";
arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32); arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
} }

View file

@ -400,22 +400,22 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
* http://llvm.org/PR19429 * http://llvm.org/PR19429
* http://llvm.org/PR16721 * http://llvm.org/PR16721
*/ */
MAttrs.push_back(util_cpu_caps.has_sse ? "+sse" : "-sse" ); MAttrs.push_back(util_get_cpu_caps()->has_sse ? "+sse" : "-sse" );
MAttrs.push_back(util_cpu_caps.has_sse2 ? "+sse2" : "-sse2" ); MAttrs.push_back(util_get_cpu_caps()->has_sse2 ? "+sse2" : "-sse2" );
MAttrs.push_back(util_cpu_caps.has_sse3 ? "+sse3" : "-sse3" ); MAttrs.push_back(util_get_cpu_caps()->has_sse3 ? "+sse3" : "-sse3" );
MAttrs.push_back(util_cpu_caps.has_ssse3 ? "+ssse3" : "-ssse3" ); MAttrs.push_back(util_get_cpu_caps()->has_ssse3 ? "+ssse3" : "-ssse3" );
MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1"); MAttrs.push_back(util_get_cpu_caps()->has_sse4_1 ? "+sse4.1" : "-sse4.1");
MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2"); MAttrs.push_back(util_get_cpu_caps()->has_sse4_2 ? "+sse4.2" : "-sse4.2");
/* /*
* AVX feature is not automatically detected from CPUID by the X86 target * AVX feature is not automatically detected from CPUID by the X86 target
* yet, because the old (yet default) JIT engine is not capable of * yet, because the old (yet default) JIT engine is not capable of
* emitting the opcodes. On newer llvm versions it is and at least some * emitting the opcodes. On newer llvm versions it is and at least some
* versions (tested with 3.3) will emit avx opcodes without this anyway. * versions (tested with 3.3) will emit avx opcodes without this anyway.
*/ */
MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx"); MAttrs.push_back(util_get_cpu_caps()->has_avx ? "+avx" : "-avx");
MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c"); MAttrs.push_back(util_get_cpu_caps()->has_f16c ? "+f16c" : "-f16c");
MAttrs.push_back(util_cpu_caps.has_fma ? "+fma" : "-fma"); MAttrs.push_back(util_get_cpu_caps()->has_fma ? "+fma" : "-fma");
MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2"); MAttrs.push_back(util_get_cpu_caps()->has_avx2 ? "+avx2" : "-avx2");
/* disable avx512 and all subvariants */ /* disable avx512 and all subvariants */
MAttrs.push_back("-avx512cd"); MAttrs.push_back("-avx512cd");
MAttrs.push_back("-avx512er"); MAttrs.push_back("-avx512er");
@ -426,7 +426,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
MAttrs.push_back("-avx512vl"); MAttrs.push_back("-avx512vl");
#endif #endif
#if defined(PIPE_ARCH_ARM) #if defined(PIPE_ARCH_ARM)
if (!util_cpu_caps.has_neon) { if (!util_get_cpu_caps()->has_neon) {
MAttrs.push_back("-neon"); MAttrs.push_back("-neon");
MAttrs.push_back("-crypto"); MAttrs.push_back("-crypto");
MAttrs.push_back("-vfp2"); MAttrs.push_back("-vfp2");
@ -434,7 +434,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
#endif #endif
#if defined(PIPE_ARCH_PPC) #if defined(PIPE_ARCH_PPC)
MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); MAttrs.push_back(util_get_cpu_caps()->has_altivec ? "+altivec" : "-altivec");
#if (LLVM_VERSION_MAJOR < 4) #if (LLVM_VERSION_MAJOR < 4)
/* /*
* Make sure VSX instructions are disabled * Make sure VSX instructions are disabled
@ -444,7 +444,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
* https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0) * https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0)
* https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0) * https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0)
*/ */
if (util_cpu_caps.has_altivec) { if (util_get_cpu_caps()->has_altivec) {
MAttrs.push_back("-vsx"); MAttrs.push_back("-vsx");
} }
#else #else
@ -458,8 +458,8 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
* Make sure VSX instructions are ENABLED (if supported), unless * Make sure VSX instructions are ENABLED (if supported), unless
* VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0. * VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0.
*/ */
if (util_cpu_caps.has_altivec) { if (util_get_cpu_caps()->has_altivec) {
MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx"); MAttrs.push_back(util_get_cpu_caps()->has_vsx ? "+vsx" : "-vsx");
} }
#endif #endif
#endif #endif

View file

@ -322,7 +322,7 @@ lp_build_interleave2(struct gallivm_state *gallivm,
{ {
LLVMValueRef shuffle; LLVMValueRef shuffle;
if (type.length == 2 && type.width == 128 && util_cpu_caps.has_avx) { if (type.length == 2 && type.width == 128 && util_get_cpu_caps()->has_avx) {
/* /*
* XXX: This is a workaround for llvm code generation deficiency. Strangely * XXX: This is a workaround for llvm code generation deficiency. Strangely
* enough, while this needs vinsertf128/vextractf128 instructions (hence * enough, while this needs vinsertf128/vextractf128 instructions (hence
@ -484,7 +484,7 @@ lp_build_unpack2_native(struct gallivm_state *gallivm,
/* Interleave bits */ /* Interleave bits */
#if UTIL_ARCH_LITTLE_ENDIAN #if UTIL_ARCH_LITTLE_ENDIAN
if (src_type.length * src_type.width == 256 && util_cpu_caps.has_avx2) { if (src_type.length * src_type.width == 256 && util_get_cpu_caps()->has_avx2) {
*dst_lo = lp_build_interleave2_half(gallivm, src_type, src, msb, 0); *dst_lo = lp_build_interleave2_half(gallivm, src_type, src, msb, 0);
*dst_hi = lp_build_interleave2_half(gallivm, src_type, src, msb, 1); *dst_hi = lp_build_interleave2_half(gallivm, src_type, src, msb, 1);
} else { } else {
@ -585,22 +585,22 @@ lp_build_pack2(struct gallivm_state *gallivm,
assert(src_type.length * 2 == dst_type.length); assert(src_type.length * 2 == dst_type.length);
/* Check for special cases first */ /* Check for special cases first */
if ((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) && if ((util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec) &&
src_type.width * src_type.length >= 128) { src_type.width * src_type.length >= 128) {
const char *intrinsic = NULL; const char *intrinsic = NULL;
boolean swap_intrinsic_operands = FALSE; boolean swap_intrinsic_operands = FALSE;
switch(src_type.width) { switch(src_type.width) {
case 32: case 32:
if (util_cpu_caps.has_sse2) { if (util_get_cpu_caps()->has_sse2) {
if (dst_type.sign) { if (dst_type.sign) {
intrinsic = "llvm.x86.sse2.packssdw.128"; intrinsic = "llvm.x86.sse2.packssdw.128";
} else { } else {
if (util_cpu_caps.has_sse4_1) { if (util_get_cpu_caps()->has_sse4_1) {
intrinsic = "llvm.x86.sse41.packusdw"; intrinsic = "llvm.x86.sse41.packusdw";
} }
} }
} else if (util_cpu_caps.has_altivec) { } else if (util_get_cpu_caps()->has_altivec) {
if (dst_type.sign) { if (dst_type.sign) {
intrinsic = "llvm.ppc.altivec.vpkswss"; intrinsic = "llvm.ppc.altivec.vpkswss";
} else { } else {
@ -613,18 +613,18 @@ lp_build_pack2(struct gallivm_state *gallivm,
break; break;
case 16: case 16:
if (dst_type.sign) { if (dst_type.sign) {
if (util_cpu_caps.has_sse2) { if (util_get_cpu_caps()->has_sse2) {
intrinsic = "llvm.x86.sse2.packsswb.128"; intrinsic = "llvm.x86.sse2.packsswb.128";
} else if (util_cpu_caps.has_altivec) { } else if (util_get_cpu_caps()->has_altivec) {
intrinsic = "llvm.ppc.altivec.vpkshss"; intrinsic = "llvm.ppc.altivec.vpkshss";
#if UTIL_ARCH_LITTLE_ENDIAN #if UTIL_ARCH_LITTLE_ENDIAN
swap_intrinsic_operands = TRUE; swap_intrinsic_operands = TRUE;
#endif #endif
} }
} else { } else {
if (util_cpu_caps.has_sse2) { if (util_get_cpu_caps()->has_sse2) {
intrinsic = "llvm.x86.sse2.packuswb.128"; intrinsic = "llvm.x86.sse2.packuswb.128";
} else if (util_cpu_caps.has_altivec) { } else if (util_get_cpu_caps()->has_altivec) {
intrinsic = "llvm.ppc.altivec.vpkshus"; intrinsic = "llvm.ppc.altivec.vpkshus";
#if UTIL_ARCH_LITTLE_ENDIAN #if UTIL_ARCH_LITTLE_ENDIAN
swap_intrinsic_operands = TRUE; swap_intrinsic_operands = TRUE;
@ -740,7 +740,7 @@ lp_build_pack2_native(struct gallivm_state *gallivm,
/* At this point only have special case for avx2 */ /* At this point only have special case for avx2 */
if (src_type.length * src_type.width == 256 && if (src_type.length * src_type.width == 256 &&
util_cpu_caps.has_avx2) { util_get_cpu_caps()->has_avx2) {
switch(src_type.width) { switch(src_type.width) {
case 32: case 32:
if (dst_type.sign) { if (dst_type.sign) {
@ -793,7 +793,7 @@ lp_build_packs2(struct gallivm_state *gallivm,
/* All X86 SSE non-interleaved pack instructions take signed inputs and /* All X86 SSE non-interleaved pack instructions take signed inputs and
* saturate them, so no need to clamp for those cases. */ * saturate them, so no need to clamp for those cases. */
if(util_cpu_caps.has_sse2 && if(util_get_cpu_caps()->has_sse2 &&
src_type.width * src_type.length >= 128 && src_type.width * src_type.length >= 128 &&
src_type.sign && src_type.sign &&
(src_type.width == 32 || src_type.width == 16)) (src_type.width == 32 || src_type.width == 16))

View file

@ -1152,7 +1152,7 @@ lp_build_minify(struct lp_build_context *bld,
LLVMValueRef size; LLVMValueRef size;
assert(bld->type.sign); assert(bld->type.sign);
if (lod_scalar || if (lod_scalar ||
(util_cpu_caps.has_avx2 || !util_cpu_caps.has_sse)) { (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
size = LLVMBuildLShr(builder, base_size, level, "minify"); size = LLVMBuildLShr(builder, base_size, level, "minify");
size = lp_build_max(bld, size, bld->one); size = lp_build_max(bld, size, bld->one);
} }

View file

@ -3235,7 +3235,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
* as it appears to be a loss with just AVX) * as it appears to be a loss with just AVX)
*/ */
if (num_quads == 1 || !use_aos || if (num_quads == 1 || !use_aos ||
(util_cpu_caps.has_avx2 && (util_get_cpu_caps()->has_avx2 &&
(bld.num_lods == 1 || (bld.num_lods == 1 ||
derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) { derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
if (use_aos) { if (use_aos) {

View file

@ -35,10 +35,10 @@
DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", false); DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", false);
static struct util_cpu_caps *get_cpu_caps(void) static const struct util_cpu_caps_t *get_cpu_caps(void)
{ {
util_cpu_detect(); util_cpu_detect();
return &util_cpu_caps; return util_get_cpu_caps();
} }
int rtasm_cpu_has_sse(void) int rtasm_cpu_has_sse(void)

View file

@ -2152,17 +2152,17 @@ static void x86_init_func_common( struct x86_function *p )
{ {
util_cpu_detect(); util_cpu_detect();
p->caps = 0; p->caps = 0;
if(util_cpu_caps.has_mmx) if(util_get_cpu_caps()->has_mmx)
p->caps |= X86_MMX; p->caps |= X86_MMX;
if(util_cpu_caps.has_mmx2) if(util_get_cpu_caps()->has_mmx2)
p->caps |= X86_MMX2; p->caps |= X86_MMX2;
if(util_cpu_caps.has_sse) if(util_get_cpu_caps()->has_sse)
p->caps |= X86_SSE; p->caps |= X86_SSE;
if(util_cpu_caps.has_sse2) if(util_get_cpu_caps()->has_sse2)
p->caps |= X86_SSE2; p->caps |= X86_SSE2;
if(util_cpu_caps.has_sse3) if(util_get_cpu_caps()->has_sse3)
p->caps |= X86_SSE3; p->caps |= X86_SSE3;
if(util_cpu_caps.has_sse4_1) if(util_get_cpu_caps()->has_sse4_1)
p->caps |= X86_SSE4_1; p->caps |= X86_SSE4_1;
p->csr = p->store; p->csr = p->store;
#if defined(PIPE_ARCH_X86) #if defined(PIPE_ARCH_X86)

View file

@ -2200,8 +2200,8 @@ tc_set_context_param(struct pipe_context *_pipe,
if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) { if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
/* Pin the gallium thread as requested. */ /* Pin the gallium thread as requested. */
util_set_thread_affinity(tc->queue.threads[0], util_set_thread_affinity(tc->queue.threads[0],
util_cpu_caps.L3_affinity_mask[value], util_get_cpu_caps()->L3_affinity_mask[value],
NULL, util_cpu_caps.num_cpu_mask_bits); NULL, util_get_cpu_caps()->num_cpu_mask_bits);
/* Execute this immediately (without enqueuing). /* Execute this immediately (without enqueuing).
* It's required to be thread-safe. * It's required to be thread-safe.
@ -2982,7 +2982,7 @@ threaded_context_create(struct pipe_context *pipe,
util_cpu_detect(); util_cpu_detect();
if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1)) if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
return pipe; return pipe;
tc = os_malloc_aligned(sizeof(struct threaded_context), 16); tc = os_malloc_aligned(sizeof(struct threaded_context), 16);

View file

@ -436,7 +436,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
assert(type.length <= 16); assert(type.length <= 16);
assert(type.floating); assert(type.floating);
if(util_cpu_caps.has_sse && type.length == 4) { if(util_get_cpu_caps()->has_sse && type.length == 4) {
const char *movmskintr = "llvm.x86.sse.movmsk.ps"; const char *movmskintr = "llvm.x86.sse.movmsk.ps";
const char *popcntintr = "llvm.ctpop.i32"; const char *popcntintr = "llvm.ctpop.i32";
LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
@ -447,7 +447,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
LLVMInt32TypeInContext(context), bits); LLVMInt32TypeInContext(context), bits);
count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
} }
else if(util_cpu_caps.has_avx && type.length == 8) { else if(util_get_cpu_caps()->has_avx && type.length == 8) {
const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
const char *popcntintr = "llvm.ctpop.i32"; const char *popcntintr = "llvm.ctpop.i32";
LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,

View file

@ -930,7 +930,7 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
screen->allow_cl = !!getenv("LP_CL"); screen->allow_cl = !!getenv("LP_CL");
screen->use_tgsi = (LP_DEBUG & DEBUG_TGSI_IR); screen->use_tgsi = (LP_DEBUG & DEBUG_TGSI_IR);
screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; screen->num_threads = util_get_cpu_caps()->nr_cpus > 1 ? util_get_cpu_caps()->nr_cpus : 0;
#ifdef EMBEDDED_DEVICE #ifdef EMBEDDED_DEVICE
screen->num_threads = 0; screen->num_threads = 0;
#endif #endif

View file

@ -403,7 +403,7 @@ flush_denorm_to_zero(float val)
fi_val.f = val; fi_val.f = val;
#if defined(PIPE_ARCH_SSE) #if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) { if (util_get_cpu_caps()->has_sse) {
if ((fi_val.ui & 0x7f800000) == 0) { if ((fi_val.ui & 0x7f800000) == 0) {
fi_val.ui &= 0xff800000; fi_val.ui &= 0xff800000;
} }
@ -479,7 +479,7 @@ test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test, unsigned
continue; continue;
} }
if (!util_cpu_caps.has_neon && if (!util_get_cpu_caps()->has_neon &&
test->ref == &nearbyintf && length == 2 && test->ref == &nearbyintf && length == 2 &&
ref != roundf(testval)) { ref != roundf(testval)) {
/* FIXME: The generic (non SSE) path in lp_build_iround, which is /* FIXME: The generic (non SSE) path in lp_build_iround, which is

View file

@ -86,7 +86,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
* of a block for all formats) though this should not be strictly necessary * of a block for all formats) though this should not be strictly necessary
* neither. In any case it can only affect compressed or 1d textures. * neither. In any case it can only affect compressed or 1d textures.
*/ */
unsigned mip_align = MAX2(64, util_cpu_caps.cacheline); unsigned mip_align = MAX2(64, util_get_cpu_caps()->cacheline);
assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS); assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS);
assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS); assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS);
@ -124,7 +124,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
if (util_format_is_compressed(pt->format)) if (util_format_is_compressed(pt->format))
lpr->row_stride[level] = nblocksx * block_size; lpr->row_stride[level] = nblocksx * block_size;
else else
lpr->row_stride[level] = align(nblocksx * block_size, util_cpu_caps.cacheline); lpr->row_stride[level] = align(nblocksx * block_size, util_get_cpu_caps()->cacheline);
/* if row_stride * height > LP_MAX_TEXTURE_SIZE */ /* if row_stride * height > LP_MAX_TEXTURE_SIZE */
if ((uint64_t)lpr->row_stride[level] * nblocksy > LP_MAX_TEXTURE_SIZE) { if ((uint64_t)lpr->row_stride[level] * nblocksy > LP_MAX_TEXTURE_SIZE) {

View file

@ -91,7 +91,7 @@ swr_create_screen(struct sw_winsys *winsys)
util_cpu_detect(); util_cpu_detect();
if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) { if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512er) {
swr_print_info("SWR detected KNL instruction support "); swr_print_info("SWR detected KNL instruction support ");
#ifndef HAVE_SWR_KNL #ifndef HAVE_SWR_KNL
swr_print_info("(skipping: not built).\n"); swr_print_info("(skipping: not built).\n");
@ -103,7 +103,7 @@ swr_create_screen(struct sw_winsys *winsys)
#endif #endif
} }
if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) { if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512bw) {
swr_print_info("SWR detected SKX instruction support "); swr_print_info("SWR detected SKX instruction support ");
#ifndef HAVE_SWR_SKX #ifndef HAVE_SWR_SKX
swr_print_info("(skipping not built).\n"); swr_print_info("(skipping not built).\n");
@ -113,7 +113,7 @@ swr_create_screen(struct sw_winsys *winsys)
#endif #endif
} }
if (util_cpu_caps.has_avx2) { if (util_get_cpu_caps()->has_avx2) {
swr_print_info("SWR detected AVX2 instruction support "); swr_print_info("SWR detected AVX2 instruction support ");
#ifndef HAVE_SWR_AVX2 #ifndef HAVE_SWR_AVX2
swr_print_info("(skipping not built).\n"); swr_print_info("(skipping not built).\n");
@ -123,7 +123,7 @@ swr_create_screen(struct sw_winsys *winsys)
#endif #endif
} }
if (util_cpu_caps.has_avx) { if (util_get_cpu_caps()->has_avx) {
swr_print_info("SWR detected AVX instruction support "); swr_print_info("SWR detected AVX instruction support ");
#ifndef HAVE_SWR_AVX #ifndef HAVE_SWR_AVX
swr_print_info("(skipping not built).\n"); swr_print_info("(skipping not built).\n");

View file

@ -90,7 +90,7 @@ vc4_load_lt_image(void *dst, uint32_t dst_stride,
int cpp, const struct pipe_box *box) int cpp, const struct pipe_box *box)
{ {
#ifdef USE_ARM_ASM #ifdef USE_ARM_ASM
if (util_cpu_caps.has_neon) { if (util_get_cpu_caps()->has_neon) {
vc4_load_lt_image_neon(dst, dst_stride, src, src_stride, vc4_load_lt_image_neon(dst, dst_stride, src, src_stride,
cpp, box); cpp, box);
return; return;
@ -106,7 +106,7 @@ vc4_store_lt_image(void *dst, uint32_t dst_stride,
int cpp, const struct pipe_box *box) int cpp, const struct pipe_box *box)
{ {
#ifdef USE_ARM_ASM #ifdef USE_ARM_ASM
if (util_cpu_caps.has_neon) { if (util_get_cpu_caps()->has_neon) {
vc4_store_lt_image_neon(dst, dst_stride, src, src_stride, vc4_store_lt_image_neon(dst, dst_stride, src, src_stride,
cpp, box); cpp, box);
return; return;

View file

@ -50,6 +50,7 @@ int main(int argc, char** argv)
{ {
struct translate *(*create_fn)(const struct translate_key *key) = 0; struct translate *(*create_fn)(const struct translate_key *key) = 0;
extern struct util_cpu_caps_t util_cpu_caps;
struct translate_key key; struct translate_key key;
unsigned output_format; unsigned output_format;
unsigned input_format; unsigned input_format;
@ -87,7 +88,7 @@ int main(int argc, char** argv)
} }
else if (!strcmp(argv[1], "sse")) else if (!strcmp(argv[1], "sse"))
{ {
if(!util_cpu_caps.has_sse || !rtasm_cpu_has_sse()) if(!util_get_cpu_caps()->has_sse || !rtasm_cpu_has_sse())
{ {
printf("Error: CPU doesn't support SSE (test with qemu)\n"); printf("Error: CPU doesn't support SSE (test with qemu)\n");
return 2; return 2;
@ -99,7 +100,7 @@ int main(int argc, char** argv)
} }
else if (!strcmp(argv[1], "sse2")) else if (!strcmp(argv[1], "sse2"))
{ {
if(!util_cpu_caps.has_sse2 || !rtasm_cpu_has_sse()) if(!util_get_cpu_caps()->has_sse2 || !rtasm_cpu_has_sse())
{ {
printf("Error: CPU doesn't support SSE2 (test with qemu)\n"); printf("Error: CPU doesn't support SSE2 (test with qemu)\n");
return 2; return 2;
@ -110,7 +111,7 @@ int main(int argc, char** argv)
} }
else if (!strcmp(argv[1], "sse3")) else if (!strcmp(argv[1], "sse3"))
{ {
if(!util_cpu_caps.has_sse3 || !rtasm_cpu_has_sse()) if(!util_get_cpu_caps()->has_sse3 || !rtasm_cpu_has_sse())
{ {
printf("Error: CPU doesn't support SSE3 (test with qemu)\n"); printf("Error: CPU doesn't support SSE3 (test with qemu)\n");
return 2; return 2;
@ -120,7 +121,7 @@ int main(int argc, char** argv)
} }
else if (!strcmp(argv[1], "sse4.1")) else if (!strcmp(argv[1], "sse4.1"))
{ {
if(!util_cpu_caps.has_sse4_1 || !rtasm_cpu_has_sse()) if(!util_get_cpu_caps()->has_sse4_1 || !rtasm_cpu_has_sse())
{ {
printf("Error: CPU doesn't support SSE4.1 (test with qemu)\n"); printf("Error: CPU doesn't support SSE4.1 (test with qemu)\n");
return 2; return 2;

View file

@ -36,13 +36,14 @@ test(void)
int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
assert(!util_cpu_caps.has_f16c); util_cpu_detect();
test(); test();
/* Test f16c. */ /* Test non-f16c. */
util_cpu_detect(); if (util_get_cpu_caps()->has_f16c) {
if (util_cpu_caps.has_f16c) ((struct util_cpu_caps_t *)util_get_cpu_caps())->has_f16c = false;
test(); test();
}
printf("Success!\n"); printf("Success!\n");
return 0; return 0;

View file

@ -331,8 +331,8 @@ static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws,
struct amdgpu_winsys *ws = amdgpu_winsys(rws); struct amdgpu_winsys *ws = amdgpu_winsys(rws);
util_set_thread_affinity(ws->cs_queue.threads[0], util_set_thread_affinity(ws->cs_queue.threads[0],
util_cpu_caps.L3_affinity_mask[cache], util_get_cpu_caps()->L3_affinity_mask[cache],
NULL, util_cpu_caps.num_cpu_mask_bits); NULL, util_get_cpu_caps()->num_cpu_mask_bits);
} }
static uint32_t kms_handle_hash(const void *key) static uint32_t kms_handle_hash(const void *key)

View file

@ -809,8 +809,8 @@ static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws,
if (util_queue_is_initialized(&rws->cs_queue)) { if (util_queue_is_initialized(&rws->cs_queue)) {
util_set_thread_affinity(rws->cs_queue.threads[0], util_set_thread_affinity(rws->cs_queue.threads[0],
util_cpu_caps.L3_affinity_mask[cache], util_get_cpu_caps()->L3_affinity_mask[cache],
NULL, util_cpu_caps.num_cpu_mask_bits); NULL, util_get_cpu_caps()->num_cpu_mask_bits);
} }
} }

View file

@ -216,18 +216,18 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
/* Pin threads regularly to the same Zen CCX that the main thread is /* Pin threads regularly to the same Zen CCX that the main thread is
* running on. The main thread can move between CCXs. * running on. The main thread can move between CCXs.
*/ */
if (util_cpu_caps.nr_cpus != util_cpu_caps.cores_per_L3 && if (util_get_cpu_caps()->nr_cpus != util_get_cpu_caps()->cores_per_L3 &&
/* driver support */ /* driver support */
ctx->Driver.PinDriverToL3Cache && ctx->Driver.PinDriverToL3Cache &&
++glthread->pin_thread_counter % 128 == 0) { ++glthread->pin_thread_counter % 128 == 0) {
int cpu = util_get_current_cpu(); int cpu = util_get_current_cpu();
if (cpu >= 0) { if (cpu >= 0) {
unsigned L3_cache = util_cpu_caps.cpu_to_L3[cpu]; unsigned L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu];
util_set_thread_affinity(glthread->queue.threads[0], util_set_thread_affinity(glthread->queue.threads[0],
util_cpu_caps.L3_affinity_mask[L3_cache], util_get_cpu_caps()->L3_affinity_mask[L3_cache],
NULL, util_cpu_caps.num_cpu_mask_bits); NULL, util_get_cpu_caps()->num_cpu_mask_bits);
ctx->Driver.PinDriverToL3Cache(ctx, L3_cache); ctx->Driver.PinDriverToL3Cache(ctx, L3_cache);
} }
} }

View file

@ -835,7 +835,7 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
!st->lower_ucp; !st->lower_ucp;
st->shader_has_one_variant[MESA_SHADER_COMPUTE] = st->has_shareable_shaders; st->shader_has_one_variant[MESA_SHADER_COMPUTE] = st->has_shareable_shaders;
if (util_cpu_caps.cores_per_L3 == util_cpu_caps.nr_cpus || if (util_get_cpu_caps()->cores_per_L3 == util_get_cpu_caps()->nr_cpus ||
!st->pipe->set_context_param) !st->pipe->set_context_param)
st->pin_thread_counter = ST_L3_PINNING_DISABLED; st->pin_thread_counter = ST_L3_PINNING_DISABLED;

View file

@ -117,7 +117,7 @@ prepare_draw(struct st_context *st, struct gl_context *ctx)
int cpu = util_get_current_cpu(); int cpu = util_get_current_cpu();
if (cpu >= 0) { if (cpu >= 0) {
struct pipe_context *pipe = st->pipe; struct pipe_context *pipe = st->pipe;
unsigned L3_cache = util_cpu_caps.cpu_to_L3[cpu]; unsigned L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu];
pipe->set_context_param(pipe, pipe->set_context_param(pipe,
PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,

View file

@ -59,7 +59,7 @@ static inline uint16_t
_mesa_float_to_half(float val) _mesa_float_to_half(float val)
{ {
#if defined(USE_X86_64_ASM) #if defined(USE_X86_64_ASM)
if (util_cpu_caps.has_f16c) { if (util_get_cpu_caps()->has_f16c) {
__m128 in = {val}; __m128 in = {val};
__m128i out; __m128i out;
@ -75,7 +75,7 @@ static inline float
_mesa_half_to_float(uint16_t val) _mesa_half_to_float(uint16_t val)
{ {
#if defined(USE_X86_64_ASM) #if defined(USE_X86_64_ASM)
if (util_cpu_caps.has_f16c) { if (util_get_cpu_caps()->has_f16c) {
__m128i in = {val}; __m128i in = {val};
__m128 out; __m128 out;
@ -90,7 +90,7 @@ static inline uint16_t
_mesa_float_to_float16_rtz(float val) _mesa_float_to_float16_rtz(float val)
{ {
#if defined(USE_X86_64_ASM) #if defined(USE_X86_64_ASM)
if (util_cpu_caps.has_f16c) { if (util_get_cpu_caps()->has_f16c) {
__m128 in = {val}; __m128 in = {val};
__m128i out; __m128i out;

View file

@ -850,6 +850,8 @@ int main(int argc, char **argv)
{ {
boolean success; boolean success;
util_cpu_detect();
success = test_all(); success = test_all();
return success ? 0 : 1; return success ? 0 : 1;

View file

@ -90,7 +90,7 @@
DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false) DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false)
struct util_cpu_caps util_cpu_caps; struct util_cpu_caps_t util_cpu_caps;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
static int has_cpuid(void); static int has_cpuid(void);
@ -548,7 +548,7 @@ util_cpu_detect_once(void)
{ {
SYSTEM_INFO system_info; SYSTEM_INFO system_info;
GetSystemInfo(&system_info); GetSystemInfo(&system_info);
util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors; util_cpu_caps.nr_cpus = MAX2(1, system_info.dwNumberOfProcessors);
} }
#elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN) #elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN)
util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);

View file

@ -55,7 +55,7 @@ enum cpu_family {
typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32]; typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32];
struct util_cpu_caps { struct util_cpu_caps_t {
int nr_cpus; int nr_cpus;
enum cpu_family family; enum cpu_family family;
@ -105,8 +105,18 @@ struct util_cpu_caps {
util_affinity_mask *L3_affinity_mask; util_affinity_mask *L3_affinity_mask;
}; };
extern struct util_cpu_caps static inline const struct util_cpu_caps_t *
util_cpu_caps; util_get_cpu_caps(void)
{
extern struct util_cpu_caps_t util_cpu_caps;
/* If you hit this assert, it means that something is using the
* cpu-caps without having first called util_cpu_detect()
*/
assert(util_cpu_caps.nr_cpus >= 1);
return &util_cpu_caps;
}
void util_cpu_detect(void); void util_cpu_detect(void);

View file

@ -92,7 +92,7 @@ util_fpstate_get(void)
unsigned mxcsr = 0; unsigned mxcsr = 0;
#if defined(PIPE_ARCH_SSE) #if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) { if (util_get_cpu_caps()->has_sse) {
mxcsr = _mm_getcsr(); mxcsr = _mm_getcsr();
} }
#endif #endif
@ -110,10 +110,10 @@ unsigned
util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)
{ {
#if defined(PIPE_ARCH_SSE) #if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) { if (util_get_cpu_caps()->has_sse) {
/* Enable flush to zero mode */ /* Enable flush to zero mode */
current_mxcsr |= _MM_FLUSH_ZERO_MASK; current_mxcsr |= _MM_FLUSH_ZERO_MASK;
if (util_cpu_caps.has_daz) { if (util_get_cpu_caps()->has_daz) {
/* Enable denormals are zero mode */ /* Enable denormals are zero mode */
current_mxcsr |= _MM_DENORMALS_ZERO_MASK; current_mxcsr |= _MM_DENORMALS_ZERO_MASK;
} }
@ -132,7 +132,7 @@ void
util_fpstate_set(unsigned mxcsr) util_fpstate_set(unsigned mxcsr)
{ {
#if defined(PIPE_ARCH_SSE) #if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) { if (util_get_cpu_caps()->has_sse) {
_mm_setcsr(mxcsr); _mm_setcsr(mxcsr);
} }
#endif #endif

View file

@ -267,7 +267,7 @@ util_queue_thread_func(void *input)
util_cpu_detect(); util_cpu_detect();
util_set_current_thread_affinity(mask, NULL, util_set_current_thread_affinity(mask, NULL,
util_cpu_caps.num_cpu_mask_bits); util_get_cpu_caps()->num_cpu_mask_bits);
} }
#if defined(__linux__) #if defined(__linux__)