From ed4fd1d90e3f3dca0ed0ac7a4a3366854c565f4b Mon Sep 17 00:00:00 2001 From: Yonggang Luo Date: Mon, 7 Nov 2022 11:00:21 +0800 Subject: [PATCH] util: cleanup cpuinfo.* and it's related files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _mesa_get_cpu_features is no more a needed thing as all it's usage are replaced with util_get_cpu_caps in u_cpu_detect.h Signed-off-by: Yonggang Luo Reviewed-by: Marek Olšák Part-of: --- src/mesa/main/context.c | 2 - src/mesa/main/cpuinfo.c | 94 --- src/mesa/main/cpuinfo.h | 43 -- src/mesa/main/tests/disable_windows_include.c | 1 - src/mesa/meson.build | 8 - src/mesa/x86/common_x86.c | 360 --------- src/mesa/x86/common_x86_asm.S | 219 ------ src/mesa/x86/common_x86_asm.h | 53 -- src/mesa/x86/common_x86_features.h | 97 --- src/mesa/x86/read_rgba_span_x86.S | 687 ------------------ 10 files changed, 1564 deletions(-) delete mode 100644 src/mesa/main/cpuinfo.c delete mode 100644 src/mesa/main/cpuinfo.h delete mode 100644 src/mesa/x86/common_x86.c delete mode 100644 src/mesa/x86/common_x86_asm.S delete mode 100644 src/mesa/x86/common_x86_asm.h delete mode 100644 src/mesa/x86/common_x86_features.h delete mode 100644 src/mesa/x86/read_rgba_span_x86.S diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 6ab069fbd52..eacce936b0c 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -87,7 +87,6 @@ #include "bufferobj.h" #include "conservativeraster.h" #include "context.h" -#include "cpuinfo.h" #include "debug.h" #include "debug_output.h" #include "depth.h" @@ -217,7 +216,6 @@ one_time_init(const char *extensions_override) _mesa_one_time_init_extension_overrides(extensions_override); - _mesa_get_cpu_features(); for (i = 0; i < 256; i++) { _mesa_ubyte_to_float_color_tab[i] = (float) i / 255.0F; diff --git a/src/mesa/main/cpuinfo.c b/src/mesa/main/cpuinfo.c deleted file mode 100644 index 1623a20892e..00000000000 --- a/src/mesa/main/cpuinfo.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include -#include -#include - -#include "main/cpuinfo.h" - - -/** - * This function should be called before the various "cpu_has_foo" macros - * are used. - */ -void -_mesa_get_cpu_features(void) -{ -#if defined USE_X86_ASM || defined USE_X86_64_ASM - _mesa_get_x86_features(); -#endif -} - - -/** - * Return a string describing the CPU architexture and extensions that - * Mesa is using (such as SSE or Altivec). - * \return information string, free it with free() - */ -char * -_mesa_get_cpu_string(void) -{ -#define MAX_STRING 50 - char *buffer; - - buffer = malloc(MAX_STRING); - if (!buffer) - return NULL; - - buffer[0] = 0; - -#ifdef USE_X86_ASM - - if (_mesa_x86_cpu_features) { - strcat(buffer, "x86"); - } - -# ifdef USE_MMX_ASM - if (cpu_has_mmx) { - strcat(buffer, (cpu_has_mmxext) ? "/MMX+" : "/MMX"); - } -# endif -# ifdef USE_3DNOW_ASM - if (cpu_has_3dnow) { - strcat(buffer, (cpu_has_3dnowext) ? "/3DNow!+" : "/3DNow!"); - } -# endif -# ifdef USE_SSE_ASM - if (cpu_has_xmm) { - strcat(buffer, (cpu_has_xmm2) ? "/SSE2" : "/SSE"); - } -# endif - -#elif defined(USE_SPARC_ASM) - - strcat(buffer, "SPARC"); - -#endif - - assert(strlen(buffer) < MAX_STRING); - - return buffer; -} diff --git a/src/mesa/main/cpuinfo.h b/src/mesa/main/cpuinfo.h deleted file mode 100644 index 57925e82bf2..00000000000 --- a/src/mesa/main/cpuinfo.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -#ifndef CPUINFO_H -#define CPUINFO_H - - -#if defined USE_X86_ASM || defined USE_X86_64_ASM -#include "x86/common_x86_asm.h" -#endif - - -extern void -_mesa_get_cpu_features(void); - - -extern char * -_mesa_get_cpu_string(void); - - -#endif /* CPUINFO_H */ diff --git a/src/mesa/main/tests/disable_windows_include.c b/src/mesa/main/tests/disable_windows_include.c index 9f40c7eac5e..8a1df65d110 100644 --- a/src/mesa/main/tests/disable_windows_include.c +++ b/src/mesa/main/tests/disable_windows_include.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/src/mesa/meson.build b/src/mesa/meson.build index dc48401da7b..86f30ab44f6 100644 --- a/src/mesa/meson.build +++ b/src/mesa/meson.build @@ -58,8 +58,6 @@ files_libmesa = files( 'main/context.c', 'main/context.h', 'main/copyimage.c', - 'main/cpuinfo.c', - 'main/cpuinfo.h', 'main/dd.h', 'main/debug.c', 'main/debug.h', @@ -389,19 +387,13 @@ files_libmesa = files( 'vbo/vbo_save_draw.c', 'vbo/vbo_save_loopback.c', 'vbo/vbo_util.h', - 'x86/common_x86.c', ) inc_libmesa_asm = [] if with_asm_arch == 'x86' files_libmesa += files( 'x86/assyntax.h', - 'x86/common_x86_asm.S', - 'x86/common_x86_asm.h', - 'x86/common_x86_features.h', - 'x86/read_rgba_span_x86.S', ) - inc_libmesa_asm = include_directories('x86') endif format_fallback_c = custom_target( diff --git a/src/mesa/x86/common_x86.c b/src/mesa/x86/common_x86.c deleted file mode 100644 index 56c357b60df..00000000000 --- a/src/mesa/x86/common_x86.c +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file common_x86.c - * - * Check CPU capabilities & initialize optimized funtions for this particular - * processor. - * - * Changed by Andre Werthmann for using the new SSE functions. - * - * \author Holger Waechtler - * \author Andre Werthmann - */ - -#if defined(USE_SSE_ASM) && defined(__FreeBSD__) -#include -#include -#endif -#if defined(USE_SSE_ASM) && (defined(__OpenBSD__) || defined(__NetBSD__)) -#include -#include -#include -#endif -#if defined(USE_X86_64_ASM) -#include -#if !defined(bit_SSE4_1) && defined(bit_SSE41) -/* XXX: clang defines bit_SSE41 instead of bit_SSE4_1 */ -#define bit_SSE4_1 bit_SSE41 -#elif !defined(bit_SSE4_1) && !defined(bit_SSE41) -#define bit_SSE4_1 0x00080000 -#endif -#endif - -#include - -#include "main/errors.h" - -#include "common_x86_asm.h" - - -/** Bitmask of X86_FEATURE_x bits */ -int _mesa_x86_cpu_features = 0x0; - -static int detection_debug = GL_FALSE; - -/* No reason for this to be public. - */ -extern GLuint _mesa_x86_has_cpuid(void); -extern void _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx); -extern GLuint _mesa_x86_cpuid_eax(GLuint op); -extern GLuint _mesa_x86_cpuid_ebx(GLuint op); -extern GLuint _mesa_x86_cpuid_ecx(GLuint op); -extern GLuint _mesa_x86_cpuid_edx(GLuint op); - - -#if defined(USE_SSE_ASM) -/* - * We must verify that the Streaming SIMD Extensions are truly supported - * on this processor before we go ahead and hook out the optimized code. - * - * However, I have been told by Alan Cox that all 2.4 (and later) Linux - * kernels provide full SSE support on all processors that expose SSE via - * the CPUID mechanism. - */ - -/* These are assembly functions: */ -extern void _mesa_test_os_sse_support( void ); -extern void _mesa_test_os_sse_exception_support( void ); - - -#if defined(_WIN32) -#ifndef STATUS_FLOAT_MULTIPLE_TRAPS -# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L) -#endif -static LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp) -{ - PEXCEPTION_RECORD rec = exp->ExceptionRecord; - PCONTEXT ctx = exp->ContextRecord; - - if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) { - _mesa_debug(NULL, "EXCEPTION_ILLEGAL_INSTRUCTION\n" ); - _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); - } else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) { - _mesa_debug(NULL, "STATUS_FLOAT_MULTIPLE_TRAPS\n"); - /* Windows seems to clear the exception flag itself, we just have to increment Eip */ - } else { - _mesa_debug(NULL, "UNEXPECTED EXCEPTION (0x%08x), terminating!\n" ); - return EXCEPTION_EXECUTE_HANDLER; - } - - if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) { - _mesa_debug(NULL, "Context does not contain control registers, terminating!\n"); - return EXCEPTION_EXECUTE_HANDLER; - } - ctx->Eip += 3; - - return EXCEPTION_CONTINUE_EXECUTION; -} -#endif /* _WIN32 */ - - -/** - * Check if SSE is supported. - * If not, turn off the X86_FEATURE_XMM flag in _mesa_x86_cpu_features. - */ -void _mesa_check_os_sse_support( void ) -{ -#if defined(__FreeBSD__) - { - int ret, enabled; - unsigned int len; - len = sizeof(enabled); - ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0); - if (ret || !enabled) - _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); - } -#elif defined (__NetBSD__) - { - int ret, enabled; - size_t len = sizeof(enabled); - ret = sysctlbyname("machdep.sse", &enabled, &len, (void *)NULL, 0); - if (ret || !enabled) - _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); - } -#elif defined(__OpenBSD__) - { - int mib[2]; - int ret, enabled; - size_t len = sizeof(enabled); - - mib[0] = CTL_MACHDEP; - mib[1] = CPU_SSE; - - ret = sysctl(mib, 2, &enabled, &len, NULL, 0); - if (ret || !enabled) - _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); - } -#elif defined(_WIN32) - LPTOP_LEVEL_EXCEPTION_FILTER oldFilter; - - /* Install our ExceptionFilter */ - oldFilter = SetUnhandledExceptionFilter( ExceptionFilter ); - - if ( cpu_has_xmm ) { - _mesa_debug(NULL, "Testing OS support for SSE...\n"); - - _mesa_test_os_sse_support(); - - if ( cpu_has_xmm ) { - _mesa_debug(NULL, "Yes.\n"); - } else { - _mesa_debug(NULL, "No!\n"); - } - } - - if ( cpu_has_xmm ) { - _mesa_debug(NULL, "Testing OS support for SSE unmasked exceptions...\n"); - - _mesa_test_os_sse_exception_support(); - - if ( cpu_has_xmm ) { - _mesa_debug(NULL, "Yes.\n"); - } else { - _mesa_debug(NULL, "No!\n"); - } - } - - /* Restore previous exception filter */ - SetUnhandledExceptionFilter( oldFilter ); - - if ( cpu_has_xmm ) { - _mesa_debug(NULL, "Tests of OS support for SSE passed.\n"); - } else { - _mesa_debug(NULL, "Tests of OS support for SSE failed!\n"); - } -#else - /* Do nothing on other platforms for now. - */ - if (detection_debug) - _mesa_debug(NULL, "Not testing OS support for SSE, leaving enabled.\n"); -#endif /* __FreeBSD__ */ -} - -#endif /* USE_SSE_ASM */ - - -/** - * Initialize the _mesa_x86_cpu_features bitfield. - * This is a no-op if called more than once. - */ -void -_mesa_get_x86_features(void) -{ - static int called = 0; - - if (called) - return; - - called = 1; - -#ifdef USE_X86_ASM - _mesa_x86_cpu_features = 0x0; - - if (getenv( "MESA_NO_ASM")) { - return; - } - - if (!_mesa_x86_has_cpuid()) { - _mesa_debug(NULL, "CPUID not detected\n"); - } - else { - GLuint cpu_features, cpu_features_ecx; - GLuint cpu_ext_features; - GLuint cpu_ext_info; - char cpu_vendor[13]; - GLuint result; - - /* get vendor name */ - _mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4)); - cpu_vendor[12] = '\0'; - - if (detection_debug) - _mesa_debug(NULL, "CPU vendor: %s\n", cpu_vendor); - - /* get cpu features */ - cpu_features = _mesa_x86_cpuid_edx(1); - cpu_features_ecx = _mesa_x86_cpuid_ecx(1); - - if (cpu_features & X86_CPU_FPU) - _mesa_x86_cpu_features |= X86_FEATURE_FPU; - if (cpu_features & X86_CPU_CMOV) - _mesa_x86_cpu_features |= X86_FEATURE_CMOV; - -#ifdef USE_MMX_ASM - if (cpu_features & X86_CPU_MMX) - _mesa_x86_cpu_features |= X86_FEATURE_MMX; -#endif - -#ifdef USE_SSE_ASM - if (cpu_features & X86_CPU_XMM) - _mesa_x86_cpu_features |= X86_FEATURE_XMM; - if (cpu_features & X86_CPU_XMM2) - _mesa_x86_cpu_features |= X86_FEATURE_XMM2; - if (cpu_features_ecx & X86_CPU_SSE4_1) - _mesa_x86_cpu_features |= X86_FEATURE_SSE4_1; -#endif - - /* query extended cpu features */ - if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) { - if (cpu_ext_info >= 0x80000001) { - - cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001); - - if (cpu_features & X86_CPU_MMX) { - -#ifdef USE_3DNOW_ASM - if (cpu_ext_features & X86_CPUEXT_3DNOW) - _mesa_x86_cpu_features |= X86_FEATURE_3DNOW; - if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT) - _mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT; -#endif - -#ifdef USE_MMX_ASM - if (cpu_ext_features & X86_CPUEXT_MMX_EXT) - _mesa_x86_cpu_features |= X86_FEATURE_MMXEXT; -#endif - } - } - - /* query cpu name */ - if (cpu_ext_info >= 0x80000002) { - GLuint ofs; - char cpu_name[49]; - for (ofs = 0; ofs < 3; ofs++) - _mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12)); - cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */ - - if (detection_debug) - _mesa_debug(NULL, "CPU name: %s\n", cpu_name); - } - } - - } - -#ifdef USE_MMX_ASM - if ( cpu_has_mmx ) { - if ( getenv( "MESA_NO_MMX" ) == 0 ) { - if (detection_debug) - _mesa_debug(NULL, "MMX cpu detected.\n"); - } else { - _mesa_x86_cpu_features &= ~(X86_FEATURE_MMX); - } - } -#endif - -#ifdef USE_3DNOW_ASM - if ( cpu_has_3dnow ) { - if ( getenv( "MESA_NO_3DNOW" ) == 0 ) { - if (detection_debug) - _mesa_debug(NULL, "3DNow! cpu detected.\n"); - } else { - _mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW); - } - } -#endif - -#ifdef USE_SSE_ASM - if ( cpu_has_xmm ) { - if ( getenv( "MESA_NO_SSE" ) == 0 ) { - if (detection_debug) - _mesa_debug(NULL, "SSE cpu detected.\n"); - if ( getenv( "MESA_FORCE_SSE" ) == 0 ) { - _mesa_check_os_sse_support(); - } - } else { - _mesa_debug(NULL, "SSE cpu detected, but switched off by user.\n"); - _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); - } - } -#endif - -#elif defined(USE_X86_64_ASM) - { - unsigned int eax, ebx, ecx, edx; - - /* Always available on x86-64. */ - _mesa_x86_cpu_features |= X86_FEATURE_XMM | X86_FEATURE_XMM2; - - if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) - return; - - if (ecx & bit_SSE4_1) - _mesa_x86_cpu_features |= X86_FEATURE_SSE4_1; - } -#endif /* USE_X86_64_ASM */ - - (void) detection_debug; -} diff --git a/src/mesa/x86/common_x86_asm.S b/src/mesa/x86/common_x86_asm.S deleted file mode 100644 index 765e324b20e..00000000000 --- a/src/mesa/x86/common_x86_asm.S +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Check extended CPU capabilities. Now justs returns the raw CPUID - * feature information, allowing the higher level code to interpret the - * results. - * - * Written by Holger Waechtler - * - * Cleaned up and simplified by Gareth Hughes - * - */ - -/* - * NOTE: Avoid using spaces in between '(' ')' and arguments, especially - * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces - * in there will break the build on some platforms. - */ - -#include "assyntax.h" -#include "common_x86_features.h" - - SEG_TEXT - -ALIGNTEXT4 -GLOBL GLNAME(_mesa_x86_has_cpuid) -HIDDEN(_mesa_x86_has_cpuid) -GLNAME(_mesa_x86_has_cpuid): - _CET_ENDBR - /* Test for the CPUID command. If the ID Flag bit in EFLAGS - * (bit 21) is writable, the CPUID command is present */ - PUSHF_L - POP_L (EAX) - MOV_L (EAX, ECX) - XOR_L (CONST(0x00200000), EAX) - PUSH_L (EAX) - POPF_L - PUSHF_L - POP_L (EAX) - - /* Verify the ID Flag bit has been written. */ - CMP_L (ECX, EAX) - SETNE (AL) - XOR_L (CONST(0xff), EAX) - - RET - - -ALIGNTEXT4 -GLOBL GLNAME(_mesa_x86_cpuid) -HIDDEN(_mesa_x86_cpuid) -GLNAME(_mesa_x86_cpuid): - _CET_ENDBR - MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ - PUSH_L (EDI) - PUSH_L (EBX) - - CPUID - - MOV_L (REGOFF(16, ESP), EDI) /* *eax */ - MOV_L (EAX, REGIND(EDI)) - MOV_L (REGOFF(20, ESP), EDI) /* *ebx */ - MOV_L (EBX, REGIND(EDI)) - MOV_L (REGOFF(24, ESP), EDI) /* *ecx */ - MOV_L (ECX, REGIND(EDI)) - MOV_L (REGOFF(28, ESP), EDI) /* *edx */ - MOV_L (EDX, REGIND(EDI)) - - POP_L (EBX) - POP_L (EDI) - RET - -ALIGNTEXT4 -GLOBL GLNAME(_mesa_x86_cpuid_eax) -HIDDEN(_mesa_x86_cpuid_eax) -GLNAME(_mesa_x86_cpuid_eax): - _CET_ENDBR - MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ - PUSH_L (EBX) - - CPUID - - POP_L (EBX) - RET - -ALIGNTEXT4 -GLOBL GLNAME(_mesa_x86_cpuid_ebx) -HIDDEN(_mesa_x86_cpuid_ebx) -GLNAME(_mesa_x86_cpuid_ebx): - _CET_ENDBR - MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ - PUSH_L (EBX) - - CPUID - MOV_L (EBX, EAX) /* return EBX */ - - POP_L (EBX) - RET - -ALIGNTEXT4 -GLOBL GLNAME(_mesa_x86_cpuid_ecx) -HIDDEN(_mesa_x86_cpuid_ecx) -GLNAME(_mesa_x86_cpuid_ecx): - _CET_ENDBR - MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ - PUSH_L (EBX) - - CPUID - MOV_L (ECX, EAX) /* return ECX */ - - POP_L (EBX) - RET - -ALIGNTEXT4 -GLOBL GLNAME(_mesa_x86_cpuid_edx) -HIDDEN(_mesa_x86_cpuid_edx) -GLNAME(_mesa_x86_cpuid_edx): - _CET_ENDBR - MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ - PUSH_L (EBX) - - CPUID - MOV_L (EDX, EAX) /* return EDX */ - - POP_L (EBX) - RET - -#ifdef USE_SSE_ASM -/* Execute an SSE instruction to see if the operating system correctly - * supports SSE. A signal handler for SIGILL should have been set - * before calling this function, otherwise this could kill the client - * application. - * - * -----> !!!! ATTENTION DEVELOPERS !!!! <----- - * - * If you're debugging with gdb and you get stopped in this function, - * just type 'continue'! Execution will proceed normally. - * See freedesktop.org bug #1709 for more info. - */ -ALIGNTEXT4 -GLOBL GLNAME( _mesa_test_os_sse_support ) -HIDDEN(_mesa_test_os_sse_support) -GLNAME( _mesa_test_os_sse_support ): - _CET_ENDBR - XORPS ( XMM0, XMM0 ) - - RET - - -/* Perform an SSE divide-by-zero to see if the operating system - * correctly supports unmasked SIMD FPU exceptions. Signal handlers for - * SIGILL and SIGFPE should have been set before calling this function, - * otherwise this could kill the client application. - */ -ALIGNTEXT4 -GLOBL GLNAME( _mesa_test_os_sse_exception_support ) -HIDDEN(_mesa_test_os_sse_exception_support) -GLNAME( _mesa_test_os_sse_exception_support ): - _CET_ENDBR - PUSH_L ( EBP ) - MOV_L ( ESP, EBP ) - SUB_L ( CONST( 8 ), ESP ) - - /* Save the original MXCSR register value. - */ - STMXCSR ( REGOFF( -4, EBP ) ) - - /* Unmask the divide-by-zero exception and perform one. - */ - STMXCSR ( REGOFF( -8, EBP ) ) - AND_L ( CONST( 0xfffffdff ), REGOFF( -8, EBP ) ) - LDMXCSR ( REGOFF( -8, EBP ) ) - - XORPS ( XMM0, XMM0 ) - - PUSH_L ( CONST( 0x3f800000 ) ) - PUSH_L ( CONST( 0x3f800000 ) ) - PUSH_L ( CONST( 0x3f800000 ) ) - PUSH_L ( CONST( 0x3f800000 ) ) - - MOVUPS ( REGIND( ESP ), XMM1 ) - - DIVPS ( XMM0, XMM1 ) - - /* Restore the original MXCSR register value. - */ - LDMXCSR ( REGOFF( -4, EBP ) ) - - LEAVE - RET - -#endif - - -#if defined (__ELF__) && defined (__linux__) - .section .note.GNU-stack,"",%progbits -#endif diff --git a/src/mesa/x86/common_x86_asm.h b/src/mesa/x86/common_x86_asm.h deleted file mode 100644 index dae3ca2a60b..00000000000 --- a/src/mesa/x86/common_x86_asm.h +++ /dev/null @@ -1,53 +0,0 @@ - -/* - * Mesa 3-D graphics library - * - * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Check CPU capabilities & initialize optimized funtions for this particular - * processor. - * - * Written by Holger Waechtler - * Changed by Andre Werthmann for using the - * new SSE functions - * - * Reimplemented by Gareth Hughes in a more - * future-proof manner, based on code in the Linux kernel. - */ - -#ifndef __COMMON_X86_ASM_H__ -#define __COMMON_X86_ASM_H__ - -/* Do not reference mtypes.h from this file. - */ -#include "common_x86_features.h" - -extern int _mesa_x86_cpu_features; - -extern void _mesa_get_x86_features(void); - -extern void _mesa_check_os_sse_support(void); - -extern void _mesa_init_all_x86_transform_asm( void ); - -#endif diff --git a/src/mesa/x86/common_x86_features.h b/src/mesa/x86/common_x86_features.h deleted file mode 100644 index 90105dafc6c..00000000000 --- a/src/mesa/x86/common_x86_features.h +++ /dev/null @@ -1,97 +0,0 @@ - -/* - * Mesa 3-D graphics library - * - * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * x86 CPUID feature information. The raw data is returned by - * _mesa_identify_x86_cpu_features() and interpreted with the cpu_has_* - * helper macros. - * - * Gareth Hughes - */ - -#ifndef __COMMON_X86_FEATURES_H__ -#define __COMMON_X86_FEATURES_H__ - -#define X86_FEATURE_FPU (1<<0) -#define X86_FEATURE_CMOV (1<<1) -#define X86_FEATURE_MMXEXT (1<<2) -#define X86_FEATURE_MMX (1<<3) -#define X86_FEATURE_FXSR (1<<4) -#define X86_FEATURE_XMM (1<<5) -#define X86_FEATURE_XMM2 (1<<6) -#define X86_FEATURE_3DNOWEXT (1<<7) -#define X86_FEATURE_3DNOW (1<<8) -#define X86_FEATURE_SSE4_1 (1<<9) - -/* standard X86 CPU features */ -#define X86_CPU_FPU (1<<0) -#define X86_CPU_CMOV (1<<15) -#define X86_CPU_MMX (1<<23) -#define X86_CPU_XMM (1<<25) -#define X86_CPU_XMM2 (1<<26) -/* ECX. */ -#define X86_CPU_SSE4_1 (1<<19) - -/* extended X86 CPU features */ -#define X86_CPUEXT_MMX_EXT (1<<22) -#define X86_CPUEXT_3DNOW_EXT (1<<30) -#define X86_CPUEXT_3DNOW (1<<31) - -#ifdef __MMX__ -#define cpu_has_mmx 1 -#else -#define cpu_has_mmx (_mesa_x86_cpu_features & X86_FEATURE_MMX) -#endif - -#define cpu_has_mmxext (_mesa_x86_cpu_features & X86_FEATURE_MMXEXT) - -#if defined(__SSE__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || defined(_M_X64) -#define cpu_has_xmm 1 -#else -#define cpu_has_xmm (_mesa_x86_cpu_features & X86_FEATURE_XMM) -#endif - -#if defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64) -#define cpu_has_xmm2 1 -#else -#define cpu_has_xmm2 (_mesa_x86_cpu_features & X86_FEATURE_XMM2) -#endif - -#ifdef __3dNOW__ -#define cpu_has_3dnow 1 -#else -#define cpu_has_3dnow (_mesa_x86_cpu_features & X86_FEATURE_3DNOW) -#endif - -#define cpu_has_3dnowext (_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT) - -#ifdef __SSE4_1__ -#define cpu_has_sse4_1 1 -#else -#define cpu_has_sse4_1 (_mesa_x86_cpu_features & X86_FEATURE_SSE4_1) -#endif - -#endif - diff --git a/src/mesa/x86/read_rgba_span_x86.S b/src/mesa/x86/read_rgba_span_x86.S deleted file mode 100644 index 9a68ab83e34..00000000000 --- a/src/mesa/x86/read_rgba_span_x86.S +++ /dev/null @@ -1,687 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2004 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file read_rgba_span_x86.S - * Optimized routines to transfer pixel data from the framebuffer to a - * buffer in main memory. - * - * \author Ian Romanick - */ -/* Control flow enforcement support */ -#ifdef HAVE_CET_H -#include -#else -#define _CET_ENDBR -#endif - - .file "read_rgba_span_x86.S" -#if !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */ -/* Kevin F. Quinn 2nd July 2006 - * Replaced data segment constants with text-segment instructions. - */ -#define LOAD_MASK(mvins,m1,m2) \ - pushl $0xff00ff00 ;\ - pushl $0xff00ff00 ;\ - pushl $0xff00ff00 ;\ - pushl $0xff00ff00 ;\ - mvins (%esp), m1 ;\ - pushl $0x00ff0000 ;\ - pushl $0x00ff0000 ;\ - pushl $0x00ff0000 ;\ - pushl $0x00ff0000 ;\ - mvins (%esp), m2 ;\ - addl $32, %esp - -/* I implemented these as macros because they appear in several places, - * and I've tweaked them a number of times. I got tired of changing every - * place they appear. :) - */ - -#define DO_ONE_PIXEL() \ - movl (%ebx), %eax ; \ - addl $4, %ebx ; \ - bswap %eax /* ARGB -> BGRA */ ; \ - rorl $8, %eax /* BGRA -> ABGR */ ; \ - movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \ - addl $4, %ecx - -#define DO_ONE_LAST_PIXEL() \ - movl (%ebx), %eax ; \ - bswap %eax /* ARGB -> BGRA */ ; \ - rorl $8, %eax /* BGRA -> ABGR */ ; \ - movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; - - -/** - * MMX optimized version of the BGRA8888_REV to RGBA copy routine. - * - * \warning - * This function assumes that the caller will issue the EMMS instruction - * at the correct places. - */ - -.globl _generic_read_RGBA_span_BGRA8888_REV_MMX -.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX - .type _generic_read_RGBA_span_BGRA8888_REV_MMX, @function -_generic_read_RGBA_span_BGRA8888_REV_MMX: - _CET_ENDBR - pushl %ebx - -#ifdef USE_INNER_EMMS - emms -#endif - LOAD_MASK(movq,%mm1,%mm2) - - movl 8(%esp), %ebx /* source pointer */ - movl 16(%esp), %edx /* number of pixels to copy */ - movl 12(%esp), %ecx /* destination pointer */ - - testl %edx, %edx - jle .L20 /* Bail if there's nothing to do. */ - - movl %ebx, %eax - - negl %eax - sarl $2, %eax - andl $1, %eax - je .L17 - - subl %eax, %edx - DO_ONE_PIXEL() -.L17: - - /* Would it be faster to unroll this loop once and process 4 pixels - * per pass, instead of just two? - */ - - movl %edx, %eax - shrl %eax - jmp .L18 -.L19: - movq (%ebx), %mm0 - addl $8, %ebx - - /* These 9 instructions do what PSHUFB (if there were such an - * instruction) could do in 1. :( - */ - - movq %mm0, %mm3 - movq %mm0, %mm4 - - pand %mm2, %mm3 - psllq $16, %mm4 - psrlq $16, %mm3 - pand %mm2, %mm4 - - pand %mm1, %mm0 - por %mm4, %mm3 - por %mm3, %mm0 - - movq %mm0, (%ecx) - addl $8, %ecx - subl $1, %eax -.L18: - jne .L19 - -#ifdef USE_INNER_EMMS - emms -#endif - - /* At this point there are either 1 or 0 pixels remaining to be - * converted. Convert the last pixel, if needed. - */ - - testl $1, %edx - je .L20 - - DO_ONE_LAST_PIXEL() - -.L20: - popl %ebx - ret - .size _generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX - - -/** - * SSE optimized version of the BGRA8888_REV to RGBA copy routine. SSE - * instructions are only actually used to read data from the framebuffer. - * In practice, the speed-up is pretty small. - * - * \todo - * Do some more testing and determine if there's any reason to have this - * function in addition to the MMX version. - * - * \warning - * This function assumes that the caller will issue the EMMS instruction - * at the correct places. - */ - -.globl _generic_read_RGBA_span_BGRA8888_REV_SSE -.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE - .type _generic_read_RGBA_span_BGRA8888_REV_SSE, @function -_generic_read_RGBA_span_BGRA8888_REV_SSE: - _CET_ENDBR - pushl %esi - pushl %ebx - pushl %ebp - -#ifdef USE_INNER_EMMS - emms -#endif - - LOAD_MASK(movq,%mm1,%mm2) - - movl 16(%esp), %ebx /* source pointer */ - movl 24(%esp), %edx /* number of pixels to copy */ - movl 20(%esp), %ecx /* destination pointer */ - - testl %edx, %edx - jle .L35 /* Bail if there's nothing to do. */ - - movl %esp, %ebp - subl $16, %esp - andl $0xfffffff0, %esp - - movl %ebx, %eax - movl %edx, %esi - - negl %eax - andl $15, %eax - sarl $2, %eax - cmpl %edx, %eax - cmovle %eax, %esi - - subl %esi, %edx - - testl $1, %esi - je .L32 - - DO_ONE_PIXEL() -.L32: - - testl $2, %esi - je .L31 - - movq (%ebx), %mm0 - addl $8, %ebx - - movq %mm0, %mm3 - movq %mm0, %mm4 - - pand %mm2, %mm3 - psllq $16, %mm4 - psrlq $16, %mm3 - pand %mm2, %mm4 - - pand %mm1, %mm0 - por %mm4, %mm3 - por %mm3, %mm0 - - movq %mm0, (%ecx) - addl $8, %ecx -.L31: - - movl %edx, %eax - shrl $2, %eax - jmp .L33 -.L34: - movaps (%ebx), %xmm0 - addl $16, %ebx - - /* This would be so much better if we could just move directly from - * an SSE register to an MMX register. Unfortunately, that - * functionality wasn't introduced until SSE2 with the MOVDQ2Q - * instruction. - */ - - movaps %xmm0, (%esp) - movq (%esp), %mm0 - movq 8(%esp), %mm5 - - movq %mm0, %mm3 - movq %mm0, %mm4 - movq %mm5, %mm6 - movq %mm5, %mm7 - - pand %mm2, %mm3 - pand %mm2, %mm6 - - psllq $16, %mm4 - psllq $16, %mm7 - - psrlq $16, %mm3 - psrlq $16, %mm6 - - pand %mm2, %mm4 - pand %mm2, %mm7 - - pand %mm1, %mm0 - pand %mm1, %mm5 - - por %mm4, %mm3 - por %mm7, %mm6 - - por %mm3, %mm0 - por %mm6, %mm5 - - movq %mm0, (%ecx) - movq %mm5, 8(%ecx) - addl $16, %ecx - - subl $1, %eax -.L33: - jne .L34 - -#ifdef USE_INNER_EMMS - emms -#endif - movl %ebp, %esp - - /* At this point there are either [0, 3] pixels remaining to be - * converted. - */ - - testl $2, %edx - je .L36 - - movq (%ebx), %mm0 - addl $8, %ebx - - movq %mm0, %mm3 - movq %mm0, %mm4 - - pand %mm2, %mm3 - psllq $16, %mm4 - psrlq $16, %mm3 - pand %mm2, %mm4 - - pand %mm1, %mm0 - por %mm4, %mm3 - por %mm3, %mm0 - - movq %mm0, (%ecx) - addl $8, %ecx -.L36: - - testl $1, %edx - je .L35 - - DO_ONE_LAST_PIXEL() -.L35: - popl %ebp - popl %ebx - popl %esi - ret - .size _generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE - - -/** - * SSE2 optimized version of the BGRA8888_REV to RGBA copy routine. - */ - - .text -.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2 -.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2 - .type _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function -_generic_read_RGBA_span_BGRA8888_REV_SSE2: - _CET_ENDBR - pushl %esi - pushl %ebx - - LOAD_MASK(movdqu,%xmm1,%xmm2) - - movl 12(%esp), %ebx /* source pointer */ - movl 20(%esp), %edx /* number of pixels to copy */ - movl 16(%esp), %ecx /* destination pointer */ - - movl %ebx, %eax - movl %edx, %esi - - testl %edx, %edx - jle .L46 /* Bail if there's nothing to do. */ - - /* If the source pointer isn't a multiple of 16 we have to process - * a few pixels the "slow" way to get the address aligned for - * the SSE fetch intsructions. - */ - - negl %eax - andl $15, %eax - sarl $2, %eax - - cmpl %edx, %eax - cmovbe %eax, %esi - subl %esi, %edx - - testl $1, %esi - je .L41 - - DO_ONE_PIXEL() -.L41: - testl $2, %esi - je .L40 - - movq (%ebx), %xmm0 - addl $8, %ebx - - movdqa %xmm0, %xmm3 - movdqa %xmm0, %xmm4 - andps %xmm1, %xmm0 - - andps %xmm2, %xmm3 - pslldq $2, %xmm4 - psrldq $2, %xmm3 - andps %xmm2, %xmm4 - - orps %xmm4, %xmm3 - orps %xmm3, %xmm0 - - movq %xmm0, (%ecx) - addl $8, %ecx -.L40: - - /* Would it be worth having a specialized version of this loop for - * the case where the destination is 16-byte aligned? That version - * would be identical except that it could use movedqa instead of - * movdqu. - */ - - movl %edx, %eax - shrl $2, %eax - jmp .L42 -.L43: - movdqa (%ebx), %xmm0 - addl $16, %ebx - - movdqa %xmm0, %xmm3 - movdqa %xmm0, %xmm4 - andps %xmm1, %xmm0 - - andps %xmm2, %xmm3 - pslldq $2, %xmm4 - psrldq $2, %xmm3 - andps %xmm2, %xmm4 - - orps %xmm4, %xmm3 - orps %xmm3, %xmm0 - - movdqu %xmm0, (%ecx) - addl $16, %ecx - subl $1, %eax -.L42: - jne .L43 - - - /* There may be upto 3 pixels remaining to be copied. Take care - * of them now. We do the 2 pixel case first because the data - * will be aligned. - */ - - testl $2, %edx - je .L47 - - movq (%ebx), %xmm0 - addl $8, %ebx - - movdqa %xmm0, %xmm3 - movdqa %xmm0, %xmm4 - andps %xmm1, %xmm0 - - andps %xmm2, %xmm3 - pslldq $2, %xmm4 - psrldq $2, %xmm3 - andps %xmm2, %xmm4 - - orps %xmm4, %xmm3 - orps %xmm3, %xmm0 - - movq %xmm0, (%ecx) - addl $8, %ecx -.L47: - - testl $1, %edx - je .L46 - - DO_ONE_LAST_PIXEL() -.L46: - - popl %ebx - popl %esi - ret - .size _generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2 - - - -#define MASK_565_L 0x07e0f800 -#define MASK_565_H 0x0000001f -/* Setting SCALE_ADJUST to 5 gives a perfect match with the - * classic C implementation in Mesa. Setting SCALE_ADJUST - * to 0 is slightly faster but at a small cost to accuracy. - */ -#define SCALE_ADJUST 5 -#if SCALE_ADJUST == 5 -#define PRESCALE_L 0x00100001 -#define PRESCALE_H 0x00000200 -#define SCALE_L 0x40C620E8 -#define SCALE_H 0x0000839d -#elif SCALE_ADJUST == 0 -#define PRESCALE_L 0x00200001 -#define PRESCALE_H 0x00000800 -#define SCALE_L 0x01040108 -#define SCALE_H 0x00000108 -#else -#error SCALE_ADJUST must either be 5 or 0. -#endif -#define ALPHA_L 0x00000000 -#define ALPHA_H 0x00ff0000 - -/** - * MMX optimized version of the RGB565 to RGBA copy routine. - */ - - .text - .globl _generic_read_RGBA_span_RGB565_MMX - .hidden _generic_read_RGBA_span_RGB565_MMX - .type _generic_read_RGBA_span_RGB565_MMX, @function - -_generic_read_RGBA_span_RGB565_MMX: - _CET_ENDBR -#ifdef USE_INNER_EMMS - emms -#endif - - movl 4(%esp), %eax /* source pointer */ - movl 8(%esp), %edx /* destination pointer */ - movl 12(%esp), %ecx /* number of pixels to copy */ - - pushl $MASK_565_H - pushl $MASK_565_L - movq (%esp), %mm5 - pushl $PRESCALE_H - pushl $PRESCALE_L - movq (%esp), %mm6 - pushl $SCALE_H - pushl $SCALE_L - movq (%esp), %mm7 - pushl $ALPHA_H - pushl $ALPHA_L - movq (%esp), %mm3 - addl $32,%esp - - sarl $2, %ecx - jl .L01 /* Bail early if the count is negative. */ - jmp .L02 - -.L03: - /* Fetch 4 RGB565 pixels into %mm4. Distribute the first and - * second pixels into the four words of %mm0 and %mm2. - */ - - movq (%eax), %mm4 - addl $8, %eax - - pshufw $0x00, %mm4, %mm0 - pshufw $0x55, %mm4, %mm2 - - - /* Mask the pixels so that each word of each register contains only - * one color component. - */ - - pand %mm5, %mm0 - pand %mm5, %mm2 - - - /* Adjust the component values so that they are as small as possible, - * but large enough so that we can multiply them by an unsigned 16-bit - * number and get a value as large as 0x00ff0000. - */ - - pmullw %mm6, %mm0 - pmullw %mm6, %mm2 -#if SCALE_ADJUST > 0 - psrlw $SCALE_ADJUST, %mm0 - psrlw $SCALE_ADJUST, %mm2 -#endif - - /* Scale the input component values to be on the range - * [0, 0x00ff0000]. This it the real magic of the whole routine. - */ - - pmulhuw %mm7, %mm0 - pmulhuw %mm7, %mm2 - - - /* Always set the alpha value to 0xff. - */ - - por %mm3, %mm0 - por %mm3, %mm2 - - - /* Pack the 16-bit values to 8-bit values and store the converted - * pixel data. - */ - - packuswb %mm2, %mm0 - movq %mm0, (%edx) - addl $8, %edx - - pshufw $0xaa, %mm4, %mm0 - pshufw $0xff, %mm4, %mm2 - - pand %mm5, %mm0 - pand %mm5, %mm2 - pmullw %mm6, %mm0 - pmullw %mm6, %mm2 -#if SCALE_ADJUST > 0 - psrlw $SCALE_ADJUST, %mm0 - psrlw $SCALE_ADJUST, %mm2 -#endif - pmulhuw %mm7, %mm0 - pmulhuw %mm7, %mm2 - - por %mm3, %mm0 - por %mm3, %mm2 - - packuswb %mm2, %mm0 - - movq %mm0, (%edx) - addl $8, %edx - - subl $1, %ecx -.L02: - jne .L03 - - - /* At this point there can be at most 3 pixels left to process. If - * there is either 2 or 3 left, process 2. - */ - - movl 12(%esp), %ecx - testl $0x02, %ecx - je .L04 - - movd (%eax), %mm4 - addl $4, %eax - - pshufw $0x00, %mm4, %mm0 - pshufw $0x55, %mm4, %mm2 - - pand %mm5, %mm0 - pand %mm5, %mm2 - pmullw %mm6, %mm0 - pmullw %mm6, %mm2 -#if SCALE_ADJUST > 0 - psrlw $SCALE_ADJUST, %mm0 - psrlw $SCALE_ADJUST, %mm2 -#endif - pmulhuw %mm7, %mm0 - pmulhuw %mm7, %mm2 - - por %mm3, %mm0 - por %mm3, %mm2 - - packuswb %mm2, %mm0 - - movq %mm0, (%edx) - addl $8, %edx - -.L04: - /* At this point there can be at most 1 pixel left to process. - * Process it if needed. - */ - - testl $0x01, %ecx - je .L01 - - movzwl (%eax), %ecx - movd %ecx, %mm4 - - pshufw $0x00, %mm4, %mm0 - - pand %mm5, %mm0 - pmullw %mm6, %mm0 -#if SCALE_ADJUST > 0 - psrlw $SCALE_ADJUST, %mm0 -#endif - pmulhuw %mm7, %mm0 - - por %mm3, %mm0 - - packuswb %mm0, %mm0 - - movd %mm0, (%edx) - -.L01: -#ifdef USE_INNER_EMMS - emms -#endif - ret -#endif /* !defined(__MINGW32__) && !defined(__APPLE__) */ - -#if defined (__ELF__) && defined (__linux__) - .section .note.GNU-stack,"",%progbits -#endif