mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
gallivm,llvmpipe: Use 4-wide vectors on AMD Bulldozer.
8-wide vectors is slower. Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
parent
9a31e090ef
commit
7eb5040197
3 changed files with 15 additions and 1 deletions
|
|
@ -434,8 +434,16 @@ lp_build_init(void)
|
|||
|
||||
util_cpu_detect();
|
||||
|
||||
/* AMD Bulldozer AVX's throughput is the same as SSE2; and because using
|
||||
* 8-wide vector needs more floating ops than 4-wide (due to padding), it is
|
||||
* actually more efficient to use 4-wide vectors on this processor.
|
||||
*
|
||||
* See also:
|
||||
* - http://www.anandtech.com/show/4955/the-bulldozer-review-amd-fx8150-tested/2
|
||||
*/
|
||||
if (HAVE_AVX &&
|
||||
util_cpu_caps.has_avx) {
|
||||
util_cpu_caps.has_avx &&
|
||||
util_cpu_caps.has_intel) {
|
||||
lp_native_vector_width = 256;
|
||||
} else {
|
||||
/* Leave it at 128, even when no SIMD extensions are available.
|
||||
|
|
|
|||
|
|
@ -286,6 +286,11 @@ util_cpu_detect(void)
|
|||
util_cpu_caps.cacheline = cacheline;
|
||||
}
|
||||
|
||||
if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) {
|
||||
/* GenuineIntel */
|
||||
util_cpu_caps.has_intel = 1;
|
||||
}
|
||||
|
||||
cpuid(0x80000000, regs);
|
||||
|
||||
if (regs[0] >= 0x80000001) {
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ struct util_cpu_caps {
|
|||
int x86_cpu_type;
|
||||
unsigned cacheline;
|
||||
|
||||
unsigned has_intel:1;
|
||||
unsigned has_tsc:1;
|
||||
unsigned has_mmx:1;
|
||||
unsigned has_mmx2:1;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue