mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-13 17:50:31 +01:00
util: cleanup cpuinfo.* and it's related files
_mesa_get_cpu_features is no more a needed thing as all it's usage are replaced with util_get_cpu_caps in u_cpu_detect.h Signed-off-by: Yonggang Luo <luoyonggang@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19564>
This commit is contained in:
parent
7436669d55
commit
ed4fd1d90e
10 changed files with 0 additions and 1564 deletions
|
|
@ -87,7 +87,6 @@
|
|||
#include "bufferobj.h"
|
||||
#include "conservativeraster.h"
|
||||
#include "context.h"
|
||||
#include "cpuinfo.h"
|
||||
#include "debug.h"
|
||||
#include "debug_output.h"
|
||||
#include "depth.h"
|
||||
|
|
@ -217,7 +216,6 @@ one_time_init(const char *extensions_override)
|
|||
|
||||
_mesa_one_time_init_extension_overrides(extensions_override);
|
||||
|
||||
_mesa_get_cpu_features();
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
_mesa_ubyte_to_float_color_tab[i] = (float) i / 255.0F;
|
||||
|
|
|
|||
|
|
@ -1,94 +0,0 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "main/cpuinfo.h"
|
||||
|
||||
|
||||
/**
|
||||
* This function should be called before the various "cpu_has_foo" macros
|
||||
* are used.
|
||||
*/
|
||||
void
|
||||
_mesa_get_cpu_features(void)
|
||||
{
|
||||
#if defined USE_X86_ASM || defined USE_X86_64_ASM
|
||||
_mesa_get_x86_features();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a string describing the CPU architexture and extensions that
|
||||
* Mesa is using (such as SSE or Altivec).
|
||||
* \return information string, free it with free()
|
||||
*/
|
||||
char *
|
||||
_mesa_get_cpu_string(void)
|
||||
{
|
||||
#define MAX_STRING 50
|
||||
char *buffer;
|
||||
|
||||
buffer = malloc(MAX_STRING);
|
||||
if (!buffer)
|
||||
return NULL;
|
||||
|
||||
buffer[0] = 0;
|
||||
|
||||
#ifdef USE_X86_ASM
|
||||
|
||||
if (_mesa_x86_cpu_features) {
|
||||
strcat(buffer, "x86");
|
||||
}
|
||||
|
||||
# ifdef USE_MMX_ASM
|
||||
if (cpu_has_mmx) {
|
||||
strcat(buffer, (cpu_has_mmxext) ? "/MMX+" : "/MMX");
|
||||
}
|
||||
# endif
|
||||
# ifdef USE_3DNOW_ASM
|
||||
if (cpu_has_3dnow) {
|
||||
strcat(buffer, (cpu_has_3dnowext) ? "/3DNow!+" : "/3DNow!");
|
||||
}
|
||||
# endif
|
||||
# ifdef USE_SSE_ASM
|
||||
if (cpu_has_xmm) {
|
||||
strcat(buffer, (cpu_has_xmm2) ? "/SSE2" : "/SSE");
|
||||
}
|
||||
# endif
|
||||
|
||||
#elif defined(USE_SPARC_ASM)
|
||||
|
||||
strcat(buffer, "SPARC");
|
||||
|
||||
#endif
|
||||
|
||||
assert(strlen(buffer) < MAX_STRING);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef CPUINFO_H
|
||||
#define CPUINFO_H
|
||||
|
||||
|
||||
#if defined USE_X86_ASM || defined USE_X86_64_ASM
|
||||
#include "x86/common_x86_asm.h"
|
||||
#endif
|
||||
|
||||
|
||||
extern void
|
||||
_mesa_get_cpu_features(void);
|
||||
|
||||
|
||||
extern char *
|
||||
_mesa_get_cpu_string(void);
|
||||
|
||||
|
||||
#endif /* CPUINFO_H */
|
||||
|
|
@ -39,7 +39,6 @@
|
|||
#include <mesa/main/conservativeraster.h>
|
||||
#include <mesa/main/consts_exts.h>
|
||||
#include <mesa/main/context.h>
|
||||
#include <mesa/main/cpuinfo.h>
|
||||
#include <mesa/main/dd.h>
|
||||
#include <mesa/main/debug.h>
|
||||
#include <mesa/main/debug_output.h>
|
||||
|
|
|
|||
|
|
@ -58,8 +58,6 @@ files_libmesa = files(
|
|||
'main/context.c',
|
||||
'main/context.h',
|
||||
'main/copyimage.c',
|
||||
'main/cpuinfo.c',
|
||||
'main/cpuinfo.h',
|
||||
'main/dd.h',
|
||||
'main/debug.c',
|
||||
'main/debug.h',
|
||||
|
|
@ -389,19 +387,13 @@ files_libmesa = files(
|
|||
'vbo/vbo_save_draw.c',
|
||||
'vbo/vbo_save_loopback.c',
|
||||
'vbo/vbo_util.h',
|
||||
'x86/common_x86.c',
|
||||
)
|
||||
|
||||
inc_libmesa_asm = []
|
||||
if with_asm_arch == 'x86'
|
||||
files_libmesa += files(
|
||||
'x86/assyntax.h',
|
||||
'x86/common_x86_asm.S',
|
||||
'x86/common_x86_asm.h',
|
||||
'x86/common_x86_features.h',
|
||||
'x86/read_rgba_span_x86.S',
|
||||
)
|
||||
inc_libmesa_asm = include_directories('x86')
|
||||
endif
|
||||
|
||||
format_fallback_c = custom_target(
|
||||
|
|
|
|||
|
|
@ -1,360 +0,0 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file common_x86.c
|
||||
*
|
||||
* Check CPU capabilities & initialize optimized funtions for this particular
|
||||
* processor.
|
||||
*
|
||||
* Changed by Andre Werthmann for using the new SSE functions.
|
||||
*
|
||||
* \author Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
|
||||
* \author Andre Werthmann <wertmann@cs.uni-potsdam.de>
|
||||
*/
|
||||
|
||||
#if defined(USE_SSE_ASM) && defined(__FreeBSD__)
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
#if defined(USE_SSE_ASM) && (defined(__OpenBSD__) || defined(__NetBSD__))
|
||||
#include <sys/param.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <machine/cpu.h>
|
||||
#endif
|
||||
#if defined(USE_X86_64_ASM)
|
||||
#include <cpuid.h>
|
||||
#if !defined(bit_SSE4_1) && defined(bit_SSE41)
|
||||
/* XXX: clang defines bit_SSE41 instead of bit_SSE4_1 */
|
||||
#define bit_SSE4_1 bit_SSE41
|
||||
#elif !defined(bit_SSE4_1) && !defined(bit_SSE41)
|
||||
#define bit_SSE4_1 0x00080000
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "main/errors.h"
|
||||
|
||||
#include "common_x86_asm.h"
|
||||
|
||||
|
||||
/** Bitmask of X86_FEATURE_x bits */
|
||||
int _mesa_x86_cpu_features = 0x0;
|
||||
|
||||
static int detection_debug = GL_FALSE;
|
||||
|
||||
/* No reason for this to be public.
|
||||
*/
|
||||
extern GLuint _mesa_x86_has_cpuid(void);
|
||||
extern void _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx);
|
||||
extern GLuint _mesa_x86_cpuid_eax(GLuint op);
|
||||
extern GLuint _mesa_x86_cpuid_ebx(GLuint op);
|
||||
extern GLuint _mesa_x86_cpuid_ecx(GLuint op);
|
||||
extern GLuint _mesa_x86_cpuid_edx(GLuint op);
|
||||
|
||||
|
||||
#if defined(USE_SSE_ASM)
|
||||
/*
|
||||
* We must verify that the Streaming SIMD Extensions are truly supported
|
||||
* on this processor before we go ahead and hook out the optimized code.
|
||||
*
|
||||
* However, I have been told by Alan Cox that all 2.4 (and later) Linux
|
||||
* kernels provide full SSE support on all processors that expose SSE via
|
||||
* the CPUID mechanism.
|
||||
*/
|
||||
|
||||
/* These are assembly functions: */
|
||||
extern void _mesa_test_os_sse_support( void );
|
||||
extern void _mesa_test_os_sse_exception_support( void );
|
||||
|
||||
|
||||
#if defined(_WIN32)
|
||||
#ifndef STATUS_FLOAT_MULTIPLE_TRAPS
|
||||
# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L)
|
||||
#endif
|
||||
static LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp)
|
||||
{
|
||||
PEXCEPTION_RECORD rec = exp->ExceptionRecord;
|
||||
PCONTEXT ctx = exp->ContextRecord;
|
||||
|
||||
if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) {
|
||||
_mesa_debug(NULL, "EXCEPTION_ILLEGAL_INSTRUCTION\n" );
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
} else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) {
|
||||
_mesa_debug(NULL, "STATUS_FLOAT_MULTIPLE_TRAPS\n");
|
||||
/* Windows seems to clear the exception flag itself, we just have to increment Eip */
|
||||
} else {
|
||||
_mesa_debug(NULL, "UNEXPECTED EXCEPTION (0x%08x), terminating!\n" );
|
||||
return EXCEPTION_EXECUTE_HANDLER;
|
||||
}
|
||||
|
||||
if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) {
|
||||
_mesa_debug(NULL, "Context does not contain control registers, terminating!\n");
|
||||
return EXCEPTION_EXECUTE_HANDLER;
|
||||
}
|
||||
ctx->Eip += 3;
|
||||
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
#endif /* _WIN32 */
|
||||
|
||||
|
||||
/**
|
||||
* Check if SSE is supported.
|
||||
* If not, turn off the X86_FEATURE_XMM flag in _mesa_x86_cpu_features.
|
||||
*/
|
||||
void _mesa_check_os_sse_support( void )
|
||||
{
|
||||
#if defined(__FreeBSD__)
|
||||
{
|
||||
int ret, enabled;
|
||||
unsigned int len;
|
||||
len = sizeof(enabled);
|
||||
ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0);
|
||||
if (ret || !enabled)
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
}
|
||||
#elif defined (__NetBSD__)
|
||||
{
|
||||
int ret, enabled;
|
||||
size_t len = sizeof(enabled);
|
||||
ret = sysctlbyname("machdep.sse", &enabled, &len, (void *)NULL, 0);
|
||||
if (ret || !enabled)
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
}
|
||||
#elif defined(__OpenBSD__)
|
||||
{
|
||||
int mib[2];
|
||||
int ret, enabled;
|
||||
size_t len = sizeof(enabled);
|
||||
|
||||
mib[0] = CTL_MACHDEP;
|
||||
mib[1] = CPU_SSE;
|
||||
|
||||
ret = sysctl(mib, 2, &enabled, &len, NULL, 0);
|
||||
if (ret || !enabled)
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
LPTOP_LEVEL_EXCEPTION_FILTER oldFilter;
|
||||
|
||||
/* Install our ExceptionFilter */
|
||||
oldFilter = SetUnhandledExceptionFilter( ExceptionFilter );
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Testing OS support for SSE...\n");
|
||||
|
||||
_mesa_test_os_sse_support();
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Yes.\n");
|
||||
} else {
|
||||
_mesa_debug(NULL, "No!\n");
|
||||
}
|
||||
}
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Testing OS support for SSE unmasked exceptions...\n");
|
||||
|
||||
_mesa_test_os_sse_exception_support();
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Yes.\n");
|
||||
} else {
|
||||
_mesa_debug(NULL, "No!\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Restore previous exception filter */
|
||||
SetUnhandledExceptionFilter( oldFilter );
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Tests of OS support for SSE passed.\n");
|
||||
} else {
|
||||
_mesa_debug(NULL, "Tests of OS support for SSE failed!\n");
|
||||
}
|
||||
#else
|
||||
/* Do nothing on other platforms for now.
|
||||
*/
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "Not testing OS support for SSE, leaving enabled.\n");
|
||||
#endif /* __FreeBSD__ */
|
||||
}
|
||||
|
||||
#endif /* USE_SSE_ASM */
|
||||
|
||||
|
||||
/**
|
||||
* Initialize the _mesa_x86_cpu_features bitfield.
|
||||
* This is a no-op if called more than once.
|
||||
*/
|
||||
void
|
||||
_mesa_get_x86_features(void)
|
||||
{
|
||||
static int called = 0;
|
||||
|
||||
if (called)
|
||||
return;
|
||||
|
||||
called = 1;
|
||||
|
||||
#ifdef USE_X86_ASM
|
||||
_mesa_x86_cpu_features = 0x0;
|
||||
|
||||
if (getenv( "MESA_NO_ASM")) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!_mesa_x86_has_cpuid()) {
|
||||
_mesa_debug(NULL, "CPUID not detected\n");
|
||||
}
|
||||
else {
|
||||
GLuint cpu_features, cpu_features_ecx;
|
||||
GLuint cpu_ext_features;
|
||||
GLuint cpu_ext_info;
|
||||
char cpu_vendor[13];
|
||||
GLuint result;
|
||||
|
||||
/* get vendor name */
|
||||
_mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4));
|
||||
cpu_vendor[12] = '\0';
|
||||
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "CPU vendor: %s\n", cpu_vendor);
|
||||
|
||||
/* get cpu features */
|
||||
cpu_features = _mesa_x86_cpuid_edx(1);
|
||||
cpu_features_ecx = _mesa_x86_cpuid_ecx(1);
|
||||
|
||||
if (cpu_features & X86_CPU_FPU)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_FPU;
|
||||
if (cpu_features & X86_CPU_CMOV)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_CMOV;
|
||||
|
||||
#ifdef USE_MMX_ASM
|
||||
if (cpu_features & X86_CPU_MMX)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_MMX;
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
if (cpu_features & X86_CPU_XMM)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_XMM;
|
||||
if (cpu_features & X86_CPU_XMM2)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_XMM2;
|
||||
if (cpu_features_ecx & X86_CPU_SSE4_1)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
|
||||
#endif
|
||||
|
||||
/* query extended cpu features */
|
||||
if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) {
|
||||
if (cpu_ext_info >= 0x80000001) {
|
||||
|
||||
cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001);
|
||||
|
||||
if (cpu_features & X86_CPU_MMX) {
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
if (cpu_ext_features & X86_CPUEXT_3DNOW)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_3DNOW;
|
||||
if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT;
|
||||
#endif
|
||||
|
||||
#ifdef USE_MMX_ASM
|
||||
if (cpu_ext_features & X86_CPUEXT_MMX_EXT)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_MMXEXT;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* query cpu name */
|
||||
if (cpu_ext_info >= 0x80000002) {
|
||||
GLuint ofs;
|
||||
char cpu_name[49];
|
||||
for (ofs = 0; ofs < 3; ofs++)
|
||||
_mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12));
|
||||
cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */
|
||||
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "CPU name: %s\n", cpu_name);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef USE_MMX_ASM
|
||||
if ( cpu_has_mmx ) {
|
||||
if ( getenv( "MESA_NO_MMX" ) == 0 ) {
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "MMX cpu detected.\n");
|
||||
} else {
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_MMX);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
if ( cpu_has_3dnow ) {
|
||||
if ( getenv( "MESA_NO_3DNOW" ) == 0 ) {
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "3DNow! cpu detected.\n");
|
||||
} else {
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
if ( cpu_has_xmm ) {
|
||||
if ( getenv( "MESA_NO_SSE" ) == 0 ) {
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "SSE cpu detected.\n");
|
||||
if ( getenv( "MESA_FORCE_SSE" ) == 0 ) {
|
||||
_mesa_check_os_sse_support();
|
||||
}
|
||||
} else {
|
||||
_mesa_debug(NULL, "SSE cpu detected, but switched off by user.\n");
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#elif defined(USE_X86_64_ASM)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
/* Always available on x86-64. */
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_XMM | X86_FEATURE_XMM2;
|
||||
|
||||
if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
|
||||
return;
|
||||
|
||||
if (ecx & bit_SSE4_1)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
|
||||
}
|
||||
#endif /* USE_X86_64_ASM */
|
||||
|
||||
(void) detection_debug;
|
||||
}
|
||||
|
|
@ -1,219 +0,0 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Check extended CPU capabilities. Now justs returns the raw CPUID
|
||||
* feature information, allowing the higher level code to interpret the
|
||||
* results.
|
||||
*
|
||||
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
|
||||
*
|
||||
* Cleaned up and simplified by Gareth Hughes <gareth@valinux.com>
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
|
||||
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
|
||||
* in there will break the build on some platforms.
|
||||
*/
|
||||
|
||||
#include "assyntax.h"
|
||||
#include "common_x86_features.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_has_cpuid)
|
||||
HIDDEN(_mesa_x86_has_cpuid)
|
||||
GLNAME(_mesa_x86_has_cpuid):
|
||||
_CET_ENDBR
|
||||
/* Test for the CPUID command. If the ID Flag bit in EFLAGS
|
||||
* (bit 21) is writable, the CPUID command is present */
|
||||
PUSHF_L
|
||||
POP_L (EAX)
|
||||
MOV_L (EAX, ECX)
|
||||
XOR_L (CONST(0x00200000), EAX)
|
||||
PUSH_L (EAX)
|
||||
POPF_L
|
||||
PUSHF_L
|
||||
POP_L (EAX)
|
||||
|
||||
/* Verify the ID Flag bit has been written. */
|
||||
CMP_L (ECX, EAX)
|
||||
SETNE (AL)
|
||||
XOR_L (CONST(0xff), EAX)
|
||||
|
||||
RET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid)
|
||||
HIDDEN(_mesa_x86_cpuid)
|
||||
GLNAME(_mesa_x86_cpuid):
|
||||
_CET_ENDBR
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EDI)
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
|
||||
MOV_L (REGOFF(16, ESP), EDI) /* *eax */
|
||||
MOV_L (EAX, REGIND(EDI))
|
||||
MOV_L (REGOFF(20, ESP), EDI) /* *ebx */
|
||||
MOV_L (EBX, REGIND(EDI))
|
||||
MOV_L (REGOFF(24, ESP), EDI) /* *ecx */
|
||||
MOV_L (ECX, REGIND(EDI))
|
||||
MOV_L (REGOFF(28, ESP), EDI) /* *edx */
|
||||
MOV_L (EDX, REGIND(EDI))
|
||||
|
||||
POP_L (EBX)
|
||||
POP_L (EDI)
|
||||
RET
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid_eax)
|
||||
HIDDEN(_mesa_x86_cpuid_eax)
|
||||
GLNAME(_mesa_x86_cpuid_eax):
|
||||
_CET_ENDBR
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
|
||||
POP_L (EBX)
|
||||
RET
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid_ebx)
|
||||
HIDDEN(_mesa_x86_cpuid_ebx)
|
||||
GLNAME(_mesa_x86_cpuid_ebx):
|
||||
_CET_ENDBR
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
MOV_L (EBX, EAX) /* return EBX */
|
||||
|
||||
POP_L (EBX)
|
||||
RET
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid_ecx)
|
||||
HIDDEN(_mesa_x86_cpuid_ecx)
|
||||
GLNAME(_mesa_x86_cpuid_ecx):
|
||||
_CET_ENDBR
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
MOV_L (ECX, EAX) /* return ECX */
|
||||
|
||||
POP_L (EBX)
|
||||
RET
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid_edx)
|
||||
HIDDEN(_mesa_x86_cpuid_edx)
|
||||
GLNAME(_mesa_x86_cpuid_edx):
|
||||
_CET_ENDBR
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
MOV_L (EDX, EAX) /* return EDX */
|
||||
|
||||
POP_L (EBX)
|
||||
RET
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
/* Execute an SSE instruction to see if the operating system correctly
|
||||
* supports SSE. A signal handler for SIGILL should have been set
|
||||
* before calling this function, otherwise this could kill the client
|
||||
* application.
|
||||
*
|
||||
* -----> !!!! ATTENTION DEVELOPERS !!!! <-----
|
||||
*
|
||||
* If you're debugging with gdb and you get stopped in this function,
|
||||
* just type 'continue'! Execution will proceed normally.
|
||||
* See freedesktop.org bug #1709 for more info.
|
||||
*/
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_test_os_sse_support )
|
||||
HIDDEN(_mesa_test_os_sse_support)
|
||||
GLNAME( _mesa_test_os_sse_support ):
|
||||
_CET_ENDBR
|
||||
XORPS ( XMM0, XMM0 )
|
||||
|
||||
RET
|
||||
|
||||
|
||||
/* Perform an SSE divide-by-zero to see if the operating system
|
||||
* correctly supports unmasked SIMD FPU exceptions. Signal handlers for
|
||||
* SIGILL and SIGFPE should have been set before calling this function,
|
||||
* otherwise this could kill the client application.
|
||||
*/
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_test_os_sse_exception_support )
|
||||
HIDDEN(_mesa_test_os_sse_exception_support)
|
||||
GLNAME( _mesa_test_os_sse_exception_support ):
|
||||
_CET_ENDBR
|
||||
PUSH_L ( EBP )
|
||||
MOV_L ( ESP, EBP )
|
||||
SUB_L ( CONST( 8 ), ESP )
|
||||
|
||||
/* Save the original MXCSR register value.
|
||||
*/
|
||||
STMXCSR ( REGOFF( -4, EBP ) )
|
||||
|
||||
/* Unmask the divide-by-zero exception and perform one.
|
||||
*/
|
||||
STMXCSR ( REGOFF( -8, EBP ) )
|
||||
AND_L ( CONST( 0xfffffdff ), REGOFF( -8, EBP ) )
|
||||
LDMXCSR ( REGOFF( -8, EBP ) )
|
||||
|
||||
XORPS ( XMM0, XMM0 )
|
||||
|
||||
PUSH_L ( CONST( 0x3f800000 ) )
|
||||
PUSH_L ( CONST( 0x3f800000 ) )
|
||||
PUSH_L ( CONST( 0x3f800000 ) )
|
||||
PUSH_L ( CONST( 0x3f800000 ) )
|
||||
|
||||
MOVUPS ( REGIND( ESP ), XMM1 )
|
||||
|
||||
DIVPS ( XMM0, XMM1 )
|
||||
|
||||
/* Restore the original MXCSR register value.
|
||||
*/
|
||||
LDMXCSR ( REGOFF( -4, EBP ) )
|
||||
|
||||
LEAVE
|
||||
RET
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Check CPU capabilities & initialize optimized funtions for this particular
|
||||
* processor.
|
||||
*
|
||||
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
|
||||
* Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
|
||||
* new SSE functions
|
||||
*
|
||||
* Reimplemented by Gareth Hughes in a more
|
||||
* future-proof manner, based on code in the Linux kernel.
|
||||
*/
|
||||
|
||||
#ifndef __COMMON_X86_ASM_H__
|
||||
#define __COMMON_X86_ASM_H__
|
||||
|
||||
/* Do not reference mtypes.h from this file.
|
||||
*/
|
||||
#include "common_x86_features.h"
|
||||
|
||||
extern int _mesa_x86_cpu_features;
|
||||
|
||||
extern void _mesa_get_x86_features(void);
|
||||
|
||||
extern void _mesa_check_os_sse_support(void);
|
||||
|
||||
extern void _mesa_init_all_x86_transform_asm( void );
|
||||
|
||||
#endif
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* x86 CPUID feature information. The raw data is returned by
|
||||
* _mesa_identify_x86_cpu_features() and interpreted with the cpu_has_*
|
||||
* helper macros.
|
||||
*
|
||||
* Gareth Hughes
|
||||
*/
|
||||
|
||||
#ifndef __COMMON_X86_FEATURES_H__
|
||||
#define __COMMON_X86_FEATURES_H__
|
||||
|
||||
#define X86_FEATURE_FPU (1<<0)
|
||||
#define X86_FEATURE_CMOV (1<<1)
|
||||
#define X86_FEATURE_MMXEXT (1<<2)
|
||||
#define X86_FEATURE_MMX (1<<3)
|
||||
#define X86_FEATURE_FXSR (1<<4)
|
||||
#define X86_FEATURE_XMM (1<<5)
|
||||
#define X86_FEATURE_XMM2 (1<<6)
|
||||
#define X86_FEATURE_3DNOWEXT (1<<7)
|
||||
#define X86_FEATURE_3DNOW (1<<8)
|
||||
#define X86_FEATURE_SSE4_1 (1<<9)
|
||||
|
||||
/* standard X86 CPU features */
|
||||
#define X86_CPU_FPU (1<<0)
|
||||
#define X86_CPU_CMOV (1<<15)
|
||||
#define X86_CPU_MMX (1<<23)
|
||||
#define X86_CPU_XMM (1<<25)
|
||||
#define X86_CPU_XMM2 (1<<26)
|
||||
/* ECX. */
|
||||
#define X86_CPU_SSE4_1 (1<<19)
|
||||
|
||||
/* extended X86 CPU features */
|
||||
#define X86_CPUEXT_MMX_EXT (1<<22)
|
||||
#define X86_CPUEXT_3DNOW_EXT (1<<30)
|
||||
#define X86_CPUEXT_3DNOW (1<<31)
|
||||
|
||||
#ifdef __MMX__
|
||||
#define cpu_has_mmx 1
|
||||
#else
|
||||
#define cpu_has_mmx (_mesa_x86_cpu_features & X86_FEATURE_MMX)
|
||||
#endif
|
||||
|
||||
#define cpu_has_mmxext (_mesa_x86_cpu_features & X86_FEATURE_MMXEXT)
|
||||
|
||||
#if defined(__SSE__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || defined(_M_X64)
|
||||
#define cpu_has_xmm 1
|
||||
#else
|
||||
#define cpu_has_xmm (_mesa_x86_cpu_features & X86_FEATURE_XMM)
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64)
|
||||
#define cpu_has_xmm2 1
|
||||
#else
|
||||
#define cpu_has_xmm2 (_mesa_x86_cpu_features & X86_FEATURE_XMM2)
|
||||
#endif
|
||||
|
||||
#ifdef __3dNOW__
|
||||
#define cpu_has_3dnow 1
|
||||
#else
|
||||
#define cpu_has_3dnow (_mesa_x86_cpu_features & X86_FEATURE_3DNOW)
|
||||
#endif
|
||||
|
||||
#define cpu_has_3dnowext (_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT)
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
#define cpu_has_sse4_1 1
|
||||
#else
|
||||
#define cpu_has_sse4_1 (_mesa_x86_cpu_features & X86_FEATURE_SSE4_1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -1,687 +0,0 @@
|
|||
/*
|
||||
* (C) Copyright IBM Corporation 2004
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file read_rgba_span_x86.S
|
||||
* Optimized routines to transfer pixel data from the framebuffer to a
|
||||
* buffer in main memory.
|
||||
*
|
||||
* \author Ian Romanick <idr@us.ibm.com>
|
||||
*/
|
||||
/* Control flow enforcement support */
|
||||
#ifdef HAVE_CET_H
|
||||
#include <cet.h>
|
||||
#else
|
||||
#define _CET_ENDBR
|
||||
#endif
|
||||
|
||||
.file "read_rgba_span_x86.S"
|
||||
#if !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
|
||||
/* Kevin F. Quinn 2nd July 2006
|
||||
* Replaced data segment constants with text-segment instructions.
|
||||
*/
|
||||
#define LOAD_MASK(mvins,m1,m2) \
|
||||
pushl $0xff00ff00 ;\
|
||||
pushl $0xff00ff00 ;\
|
||||
pushl $0xff00ff00 ;\
|
||||
pushl $0xff00ff00 ;\
|
||||
mvins (%esp), m1 ;\
|
||||
pushl $0x00ff0000 ;\
|
||||
pushl $0x00ff0000 ;\
|
||||
pushl $0x00ff0000 ;\
|
||||
pushl $0x00ff0000 ;\
|
||||
mvins (%esp), m2 ;\
|
||||
addl $32, %esp
|
||||
|
||||
/* I implemented these as macros because they appear in several places,
|
||||
* and I've tweaked them a number of times. I got tired of changing every
|
||||
* place they appear. :)
|
||||
*/
|
||||
|
||||
#define DO_ONE_PIXEL() \
|
||||
movl (%ebx), %eax ; \
|
||||
addl $4, %ebx ; \
|
||||
bswap %eax /* ARGB -> BGRA */ ; \
|
||||
rorl $8, %eax /* BGRA -> ABGR */ ; \
|
||||
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
|
||||
addl $4, %ecx
|
||||
|
||||
#define DO_ONE_LAST_PIXEL() \
|
||||
movl (%ebx), %eax ; \
|
||||
bswap %eax /* ARGB -> BGRA */ ; \
|
||||
rorl $8, %eax /* BGRA -> ABGR */ ; \
|
||||
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ;
|
||||
|
||||
|
||||
/**
|
||||
* MMX optimized version of the BGRA8888_REV to RGBA copy routine.
|
||||
*
|
||||
* \warning
|
||||
* This function assumes that the caller will issue the EMMS instruction
|
||||
* at the correct places.
|
||||
*/
|
||||
|
||||
.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
|
||||
.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
|
||||
.type _generic_read_RGBA_span_BGRA8888_REV_MMX, @function
|
||||
_generic_read_RGBA_span_BGRA8888_REV_MMX:
|
||||
_CET_ENDBR
|
||||
pushl %ebx
|
||||
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
LOAD_MASK(movq,%mm1,%mm2)
|
||||
|
||||
movl 8(%esp), %ebx /* source pointer */
|
||||
movl 16(%esp), %edx /* number of pixels to copy */
|
||||
movl 12(%esp), %ecx /* destination pointer */
|
||||
|
||||
testl %edx, %edx
|
||||
jle .L20 /* Bail if there's nothing to do. */
|
||||
|
||||
movl %ebx, %eax
|
||||
|
||||
negl %eax
|
||||
sarl $2, %eax
|
||||
andl $1, %eax
|
||||
je .L17
|
||||
|
||||
subl %eax, %edx
|
||||
DO_ONE_PIXEL()
|
||||
.L17:
|
||||
|
||||
/* Would it be faster to unroll this loop once and process 4 pixels
|
||||
* per pass, instead of just two?
|
||||
*/
|
||||
|
||||
movl %edx, %eax
|
||||
shrl %eax
|
||||
jmp .L18
|
||||
.L19:
|
||||
movq (%ebx), %mm0
|
||||
addl $8, %ebx
|
||||
|
||||
/* These 9 instructions do what PSHUFB (if there were such an
|
||||
* instruction) could do in 1. :(
|
||||
*/
|
||||
|
||||
movq %mm0, %mm3
|
||||
movq %mm0, %mm4
|
||||
|
||||
pand %mm2, %mm3
|
||||
psllq $16, %mm4
|
||||
psrlq $16, %mm3
|
||||
pand %mm2, %mm4
|
||||
|
||||
pand %mm1, %mm0
|
||||
por %mm4, %mm3
|
||||
por %mm3, %mm0
|
||||
|
||||
movq %mm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
subl $1, %eax
|
||||
.L18:
|
||||
jne .L19
|
||||
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
|
||||
/* At this point there are either 1 or 0 pixels remaining to be
|
||||
* converted. Convert the last pixel, if needed.
|
||||
*/
|
||||
|
||||
testl $1, %edx
|
||||
je .L20
|
||||
|
||||
DO_ONE_LAST_PIXEL()
|
||||
|
||||
.L20:
|
||||
popl %ebx
|
||||
ret
|
||||
.size _generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
|
||||
|
||||
|
||||
/**
|
||||
* SSE optimized version of the BGRA8888_REV to RGBA copy routine. SSE
|
||||
* instructions are only actually used to read data from the framebuffer.
|
||||
* In practice, the speed-up is pretty small.
|
||||
*
|
||||
* \todo
|
||||
* Do some more testing and determine if there's any reason to have this
|
||||
* function in addition to the MMX version.
|
||||
*
|
||||
* \warning
|
||||
* This function assumes that the caller will issue the EMMS instruction
|
||||
* at the correct places.
|
||||
*/
|
||||
|
||||
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
|
||||
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
|
||||
.type _generic_read_RGBA_span_BGRA8888_REV_SSE, @function
|
||||
_generic_read_RGBA_span_BGRA8888_REV_SSE:
|
||||
_CET_ENDBR
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
|
||||
LOAD_MASK(movq,%mm1,%mm2)
|
||||
|
||||
movl 16(%esp), %ebx /* source pointer */
|
||||
movl 24(%esp), %edx /* number of pixels to copy */
|
||||
movl 20(%esp), %ecx /* destination pointer */
|
||||
|
||||
testl %edx, %edx
|
||||
jle .L35 /* Bail if there's nothing to do. */
|
||||
|
||||
movl %esp, %ebp
|
||||
subl $16, %esp
|
||||
andl $0xfffffff0, %esp
|
||||
|
||||
movl %ebx, %eax
|
||||
movl %edx, %esi
|
||||
|
||||
negl %eax
|
||||
andl $15, %eax
|
||||
sarl $2, %eax
|
||||
cmpl %edx, %eax
|
||||
cmovle %eax, %esi
|
||||
|
||||
subl %esi, %edx
|
||||
|
||||
testl $1, %esi
|
||||
je .L32
|
||||
|
||||
DO_ONE_PIXEL()
|
||||
.L32:
|
||||
|
||||
testl $2, %esi
|
||||
je .L31
|
||||
|
||||
movq (%ebx), %mm0
|
||||
addl $8, %ebx
|
||||
|
||||
movq %mm0, %mm3
|
||||
movq %mm0, %mm4
|
||||
|
||||
pand %mm2, %mm3
|
||||
psllq $16, %mm4
|
||||
psrlq $16, %mm3
|
||||
pand %mm2, %mm4
|
||||
|
||||
pand %mm1, %mm0
|
||||
por %mm4, %mm3
|
||||
por %mm3, %mm0
|
||||
|
||||
movq %mm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
.L31:
|
||||
|
||||
movl %edx, %eax
|
||||
shrl $2, %eax
|
||||
jmp .L33
|
||||
.L34:
|
||||
movaps (%ebx), %xmm0
|
||||
addl $16, %ebx
|
||||
|
||||
/* This would be so much better if we could just move directly from
|
||||
* an SSE register to an MMX register. Unfortunately, that
|
||||
* functionality wasn't introduced until SSE2 with the MOVDQ2Q
|
||||
* instruction.
|
||||
*/
|
||||
|
||||
movaps %xmm0, (%esp)
|
||||
movq (%esp), %mm0
|
||||
movq 8(%esp), %mm5
|
||||
|
||||
movq %mm0, %mm3
|
||||
movq %mm0, %mm4
|
||||
movq %mm5, %mm6
|
||||
movq %mm5, %mm7
|
||||
|
||||
pand %mm2, %mm3
|
||||
pand %mm2, %mm6
|
||||
|
||||
psllq $16, %mm4
|
||||
psllq $16, %mm7
|
||||
|
||||
psrlq $16, %mm3
|
||||
psrlq $16, %mm6
|
||||
|
||||
pand %mm2, %mm4
|
||||
pand %mm2, %mm7
|
||||
|
||||
pand %mm1, %mm0
|
||||
pand %mm1, %mm5
|
||||
|
||||
por %mm4, %mm3
|
||||
por %mm7, %mm6
|
||||
|
||||
por %mm3, %mm0
|
||||
por %mm6, %mm5
|
||||
|
||||
movq %mm0, (%ecx)
|
||||
movq %mm5, 8(%ecx)
|
||||
addl $16, %ecx
|
||||
|
||||
subl $1, %eax
|
||||
.L33:
|
||||
jne .L34
|
||||
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
movl %ebp, %esp
|
||||
|
||||
/* At this point there are either [0, 3] pixels remaining to be
|
||||
* converted.
|
||||
*/
|
||||
|
||||
testl $2, %edx
|
||||
je .L36
|
||||
|
||||
movq (%ebx), %mm0
|
||||
addl $8, %ebx
|
||||
|
||||
movq %mm0, %mm3
|
||||
movq %mm0, %mm4
|
||||
|
||||
pand %mm2, %mm3
|
||||
psllq $16, %mm4
|
||||
psrlq $16, %mm3
|
||||
pand %mm2, %mm4
|
||||
|
||||
pand %mm1, %mm0
|
||||
por %mm4, %mm3
|
||||
por %mm3, %mm0
|
||||
|
||||
movq %mm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
.L36:
|
||||
|
||||
testl $1, %edx
|
||||
je .L35
|
||||
|
||||
DO_ONE_LAST_PIXEL()
|
||||
.L35:
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size _generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
|
||||
|
||||
|
||||
/**
|
||||
* SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
|
||||
*/
|
||||
|
||||
.text
|
||||
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
|
||||
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
|
||||
.type _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
|
||||
_generic_read_RGBA_span_BGRA8888_REV_SSE2:
|
||||
_CET_ENDBR
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
|
||||
LOAD_MASK(movdqu,%xmm1,%xmm2)
|
||||
|
||||
movl 12(%esp), %ebx /* source pointer */
|
||||
movl 20(%esp), %edx /* number of pixels to copy */
|
||||
movl 16(%esp), %ecx /* destination pointer */
|
||||
|
||||
movl %ebx, %eax
|
||||
movl %edx, %esi
|
||||
|
||||
testl %edx, %edx
|
||||
jle .L46 /* Bail if there's nothing to do. */
|
||||
|
||||
/* If the source pointer isn't a multiple of 16 we have to process
|
||||
* a few pixels the "slow" way to get the address aligned for
|
||||
* the SSE fetch intsructions.
|
||||
*/
|
||||
|
||||
negl %eax
|
||||
andl $15, %eax
|
||||
sarl $2, %eax
|
||||
|
||||
cmpl %edx, %eax
|
||||
cmovbe %eax, %esi
|
||||
subl %esi, %edx
|
||||
|
||||
testl $1, %esi
|
||||
je .L41
|
||||
|
||||
DO_ONE_PIXEL()
|
||||
.L41:
|
||||
testl $2, %esi
|
||||
je .L40
|
||||
|
||||
movq (%ebx), %xmm0
|
||||
addl $8, %ebx
|
||||
|
||||
movdqa %xmm0, %xmm3
|
||||
movdqa %xmm0, %xmm4
|
||||
andps %xmm1, %xmm0
|
||||
|
||||
andps %xmm2, %xmm3
|
||||
pslldq $2, %xmm4
|
||||
psrldq $2, %xmm3
|
||||
andps %xmm2, %xmm4
|
||||
|
||||
orps %xmm4, %xmm3
|
||||
orps %xmm3, %xmm0
|
||||
|
||||
movq %xmm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
.L40:
|
||||
|
||||
/* Would it be worth having a specialized version of this loop for
|
||||
* the case where the destination is 16-byte aligned? That version
|
||||
* would be identical except that it could use movedqa instead of
|
||||
* movdqu.
|
||||
*/
|
||||
|
||||
movl %edx, %eax
|
||||
shrl $2, %eax
|
||||
jmp .L42
|
||||
.L43:
|
||||
movdqa (%ebx), %xmm0
|
||||
addl $16, %ebx
|
||||
|
||||
movdqa %xmm0, %xmm3
|
||||
movdqa %xmm0, %xmm4
|
||||
andps %xmm1, %xmm0
|
||||
|
||||
andps %xmm2, %xmm3
|
||||
pslldq $2, %xmm4
|
||||
psrldq $2, %xmm3
|
||||
andps %xmm2, %xmm4
|
||||
|
||||
orps %xmm4, %xmm3
|
||||
orps %xmm3, %xmm0
|
||||
|
||||
movdqu %xmm0, (%ecx)
|
||||
addl $16, %ecx
|
||||
subl $1, %eax
|
||||
.L42:
|
||||
jne .L43
|
||||
|
||||
|
||||
/* There may be upto 3 pixels remaining to be copied. Take care
|
||||
* of them now. We do the 2 pixel case first because the data
|
||||
* will be aligned.
|
||||
*/
|
||||
|
||||
testl $2, %edx
|
||||
je .L47
|
||||
|
||||
movq (%ebx), %xmm0
|
||||
addl $8, %ebx
|
||||
|
||||
movdqa %xmm0, %xmm3
|
||||
movdqa %xmm0, %xmm4
|
||||
andps %xmm1, %xmm0
|
||||
|
||||
andps %xmm2, %xmm3
|
||||
pslldq $2, %xmm4
|
||||
psrldq $2, %xmm3
|
||||
andps %xmm2, %xmm4
|
||||
|
||||
orps %xmm4, %xmm3
|
||||
orps %xmm3, %xmm0
|
||||
|
||||
movq %xmm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
.L47:
|
||||
|
||||
testl $1, %edx
|
||||
je .L46
|
||||
|
||||
DO_ONE_LAST_PIXEL()
|
||||
.L46:
|
||||
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size _generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
|
||||
|
||||
|
||||
|
||||
#define MASK_565_L 0x07e0f800
|
||||
#define MASK_565_H 0x0000001f
|
||||
/* Setting SCALE_ADJUST to 5 gives a perfect match with the
|
||||
* classic C implementation in Mesa. Setting SCALE_ADJUST
|
||||
* to 0 is slightly faster but at a small cost to accuracy.
|
||||
*/
|
||||
#define SCALE_ADJUST 5
|
||||
#if SCALE_ADJUST == 5
|
||||
#define PRESCALE_L 0x00100001
|
||||
#define PRESCALE_H 0x00000200
|
||||
#define SCALE_L 0x40C620E8
|
||||
#define SCALE_H 0x0000839d
|
||||
#elif SCALE_ADJUST == 0
|
||||
#define PRESCALE_L 0x00200001
|
||||
#define PRESCALE_H 0x00000800
|
||||
#define SCALE_L 0x01040108
|
||||
#define SCALE_H 0x00000108
|
||||
#else
|
||||
#error SCALE_ADJUST must either be 5 or 0.
|
||||
#endif
|
||||
#define ALPHA_L 0x00000000
|
||||
#define ALPHA_H 0x00ff0000
|
||||
|
||||
/**
|
||||
* MMX optimized version of the RGB565 to RGBA copy routine.
|
||||
*/
|
||||
|
||||
.text
|
||||
.globl _generic_read_RGBA_span_RGB565_MMX
|
||||
.hidden _generic_read_RGBA_span_RGB565_MMX
|
||||
.type _generic_read_RGBA_span_RGB565_MMX, @function
|
||||
|
||||
_generic_read_RGBA_span_RGB565_MMX:
|
||||
_CET_ENDBR
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
|
||||
movl 4(%esp), %eax /* source pointer */
|
||||
movl 8(%esp), %edx /* destination pointer */
|
||||
movl 12(%esp), %ecx /* number of pixels to copy */
|
||||
|
||||
pushl $MASK_565_H
|
||||
pushl $MASK_565_L
|
||||
movq (%esp), %mm5
|
||||
pushl $PRESCALE_H
|
||||
pushl $PRESCALE_L
|
||||
movq (%esp), %mm6
|
||||
pushl $SCALE_H
|
||||
pushl $SCALE_L
|
||||
movq (%esp), %mm7
|
||||
pushl $ALPHA_H
|
||||
pushl $ALPHA_L
|
||||
movq (%esp), %mm3
|
||||
addl $32,%esp
|
||||
|
||||
sarl $2, %ecx
|
||||
jl .L01 /* Bail early if the count is negative. */
|
||||
jmp .L02
|
||||
|
||||
.L03:
|
||||
/* Fetch 4 RGB565 pixels into %mm4. Distribute the first and
|
||||
* second pixels into the four words of %mm0 and %mm2.
|
||||
*/
|
||||
|
||||
movq (%eax), %mm4
|
||||
addl $8, %eax
|
||||
|
||||
pshufw $0x00, %mm4, %mm0
|
||||
pshufw $0x55, %mm4, %mm2
|
||||
|
||||
|
||||
/* Mask the pixels so that each word of each register contains only
|
||||
* one color component.
|
||||
*/
|
||||
|
||||
pand %mm5, %mm0
|
||||
pand %mm5, %mm2
|
||||
|
||||
|
||||
/* Adjust the component values so that they are as small as possible,
|
||||
* but large enough so that we can multiply them by an unsigned 16-bit
|
||||
* number and get a value as large as 0x00ff0000.
|
||||
*/
|
||||
|
||||
pmullw %mm6, %mm0
|
||||
pmullw %mm6, %mm2
|
||||
#if SCALE_ADJUST > 0
|
||||
psrlw $SCALE_ADJUST, %mm0
|
||||
psrlw $SCALE_ADJUST, %mm2
|
||||
#endif
|
||||
|
||||
/* Scale the input component values to be on the range
|
||||
* [0, 0x00ff0000]. This it the real magic of the whole routine.
|
||||
*/
|
||||
|
||||
pmulhuw %mm7, %mm0
|
||||
pmulhuw %mm7, %mm2
|
||||
|
||||
|
||||
/* Always set the alpha value to 0xff.
|
||||
*/
|
||||
|
||||
por %mm3, %mm0
|
||||
por %mm3, %mm2
|
||||
|
||||
|
||||
/* Pack the 16-bit values to 8-bit values and store the converted
|
||||
* pixel data.
|
||||
*/
|
||||
|
||||
packuswb %mm2, %mm0
|
||||
movq %mm0, (%edx)
|
||||
addl $8, %edx
|
||||
|
||||
pshufw $0xaa, %mm4, %mm0
|
||||
pshufw $0xff, %mm4, %mm2
|
||||
|
||||
pand %mm5, %mm0
|
||||
pand %mm5, %mm2
|
||||
pmullw %mm6, %mm0
|
||||
pmullw %mm6, %mm2
|
||||
#if SCALE_ADJUST > 0
|
||||
psrlw $SCALE_ADJUST, %mm0
|
||||
psrlw $SCALE_ADJUST, %mm2
|
||||
#endif
|
||||
pmulhuw %mm7, %mm0
|
||||
pmulhuw %mm7, %mm2
|
||||
|
||||
por %mm3, %mm0
|
||||
por %mm3, %mm2
|
||||
|
||||
packuswb %mm2, %mm0
|
||||
|
||||
movq %mm0, (%edx)
|
||||
addl $8, %edx
|
||||
|
||||
subl $1, %ecx
|
||||
.L02:
|
||||
jne .L03
|
||||
|
||||
|
||||
/* At this point there can be at most 3 pixels left to process. If
|
||||
* there is either 2 or 3 left, process 2.
|
||||
*/
|
||||
|
||||
movl 12(%esp), %ecx
|
||||
testl $0x02, %ecx
|
||||
je .L04
|
||||
|
||||
movd (%eax), %mm4
|
||||
addl $4, %eax
|
||||
|
||||
pshufw $0x00, %mm4, %mm0
|
||||
pshufw $0x55, %mm4, %mm2
|
||||
|
||||
pand %mm5, %mm0
|
||||
pand %mm5, %mm2
|
||||
pmullw %mm6, %mm0
|
||||
pmullw %mm6, %mm2
|
||||
#if SCALE_ADJUST > 0
|
||||
psrlw $SCALE_ADJUST, %mm0
|
||||
psrlw $SCALE_ADJUST, %mm2
|
||||
#endif
|
||||
pmulhuw %mm7, %mm0
|
||||
pmulhuw %mm7, %mm2
|
||||
|
||||
por %mm3, %mm0
|
||||
por %mm3, %mm2
|
||||
|
||||
packuswb %mm2, %mm0
|
||||
|
||||
movq %mm0, (%edx)
|
||||
addl $8, %edx
|
||||
|
||||
.L04:
|
||||
/* At this point there can be at most 1 pixel left to process.
|
||||
* Process it if needed.
|
||||
*/
|
||||
|
||||
testl $0x01, %ecx
|
||||
je .L01
|
||||
|
||||
movzwl (%eax), %ecx
|
||||
movd %ecx, %mm4
|
||||
|
||||
pshufw $0x00, %mm4, %mm0
|
||||
|
||||
pand %mm5, %mm0
|
||||
pmullw %mm6, %mm0
|
||||
#if SCALE_ADJUST > 0
|
||||
psrlw $SCALE_ADJUST, %mm0
|
||||
#endif
|
||||
pmulhuw %mm7, %mm0
|
||||
|
||||
por %mm3, %mm0
|
||||
|
||||
packuswb %mm0, %mm0
|
||||
|
||||
movd %mm0, (%edx)
|
||||
|
||||
.L01:
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
ret
|
||||
#endif /* !defined(__MINGW32__) && !defined(__APPLE__) */
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
Loading…
Add table
Reference in a new issue