util: cleanup cpuinfo.* and it's related files

_mesa_get_cpu_features is no more a needed thing as all it's usage are
replaced with util_get_cpu_caps in u_cpu_detect.h

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19564>
This commit is contained in:
Yonggang Luo 2022-11-07 11:00:21 +08:00 committed by Marge Bot
parent 7436669d55
commit ed4fd1d90e
10 changed files with 0 additions and 1564 deletions

View file

@ -87,7 +87,6 @@
#include "bufferobj.h"
#include "conservativeraster.h"
#include "context.h"
#include "cpuinfo.h"
#include "debug.h"
#include "debug_output.h"
#include "depth.h"
@ -217,7 +216,6 @@ one_time_init(const char *extensions_override)
_mesa_one_time_init_extension_overrides(extensions_override);
_mesa_get_cpu_features();
for (i = 0; i < 256; i++) {
_mesa_ubyte_to_float_color_tab[i] = (float) i / 255.0F;

View file

@ -1,94 +0,0 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "main/cpuinfo.h"
/**
* This function should be called before the various "cpu_has_foo" macros
* are used.
*/
void
_mesa_get_cpu_features(void)
{
#if defined USE_X86_ASM || defined USE_X86_64_ASM
_mesa_get_x86_features();
#endif
}
/**
* Return a string describing the CPU architexture and extensions that
* Mesa is using (such as SSE or Altivec).
* \return information string, free it with free()
*/
char *
_mesa_get_cpu_string(void)
{
#define MAX_STRING 50
char *buffer;
buffer = malloc(MAX_STRING);
if (!buffer)
return NULL;
buffer[0] = 0;
#ifdef USE_X86_ASM
if (_mesa_x86_cpu_features) {
strcat(buffer, "x86");
}
# ifdef USE_MMX_ASM
if (cpu_has_mmx) {
strcat(buffer, (cpu_has_mmxext) ? "/MMX+" : "/MMX");
}
# endif
# ifdef USE_3DNOW_ASM
if (cpu_has_3dnow) {
strcat(buffer, (cpu_has_3dnowext) ? "/3DNow!+" : "/3DNow!");
}
# endif
# ifdef USE_SSE_ASM
if (cpu_has_xmm) {
strcat(buffer, (cpu_has_xmm2) ? "/SSE2" : "/SSE");
}
# endif
#elif defined(USE_SPARC_ASM)
strcat(buffer, "SPARC");
#endif
assert(strlen(buffer) < MAX_STRING);
return buffer;
}

View file

@ -1,43 +0,0 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CPUINFO_H
#define CPUINFO_H
#if defined USE_X86_ASM || defined USE_X86_64_ASM
#include "x86/common_x86_asm.h"
#endif
extern void
_mesa_get_cpu_features(void);
extern char *
_mesa_get_cpu_string(void);
#endif /* CPUINFO_H */

View file

@ -39,7 +39,6 @@
#include <mesa/main/conservativeraster.h>
#include <mesa/main/consts_exts.h>
#include <mesa/main/context.h>
#include <mesa/main/cpuinfo.h>
#include <mesa/main/dd.h>
#include <mesa/main/debug.h>
#include <mesa/main/debug_output.h>

View file

@ -58,8 +58,6 @@ files_libmesa = files(
'main/context.c',
'main/context.h',
'main/copyimage.c',
'main/cpuinfo.c',
'main/cpuinfo.h',
'main/dd.h',
'main/debug.c',
'main/debug.h',
@ -389,19 +387,13 @@ files_libmesa = files(
'vbo/vbo_save_draw.c',
'vbo/vbo_save_loopback.c',
'vbo/vbo_util.h',
'x86/common_x86.c',
)
inc_libmesa_asm = []
if with_asm_arch == 'x86'
files_libmesa += files(
'x86/assyntax.h',
'x86/common_x86_asm.S',
'x86/common_x86_asm.h',
'x86/common_x86_features.h',
'x86/read_rgba_span_x86.S',
)
inc_libmesa_asm = include_directories('x86')
endif
format_fallback_c = custom_target(

View file

@ -1,360 +0,0 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file common_x86.c
*
* Check CPU capabilities & initialize optimized funtions for this particular
* processor.
*
* Changed by Andre Werthmann for using the new SSE functions.
*
* \author Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
* \author Andre Werthmann <wertmann@cs.uni-potsdam.de>
*/
#if defined(USE_SSE_ASM) && defined(__FreeBSD__)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
#if defined(USE_SSE_ASM) && (defined(__OpenBSD__) || defined(__NetBSD__))
#include <sys/param.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#endif
#if defined(USE_X86_64_ASM)
#include <cpuid.h>
#if !defined(bit_SSE4_1) && defined(bit_SSE41)
/* XXX: clang defines bit_SSE41 instead of bit_SSE4_1 */
#define bit_SSE4_1 bit_SSE41
#elif !defined(bit_SSE4_1) && !defined(bit_SSE41)
#define bit_SSE4_1 0x00080000
#endif
#endif
#include <stdlib.h>
#include "main/errors.h"
#include "common_x86_asm.h"
/** Bitmask of X86_FEATURE_x bits */
int _mesa_x86_cpu_features = 0x0;
static int detection_debug = GL_FALSE;
/* No reason for this to be public.
*/
extern GLuint _mesa_x86_has_cpuid(void);
extern void _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx);
extern GLuint _mesa_x86_cpuid_eax(GLuint op);
extern GLuint _mesa_x86_cpuid_ebx(GLuint op);
extern GLuint _mesa_x86_cpuid_ecx(GLuint op);
extern GLuint _mesa_x86_cpuid_edx(GLuint op);
#if defined(USE_SSE_ASM)
/*
* We must verify that the Streaming SIMD Extensions are truly supported
* on this processor before we go ahead and hook out the optimized code.
*
* However, I have been told by Alan Cox that all 2.4 (and later) Linux
* kernels provide full SSE support on all processors that expose SSE via
* the CPUID mechanism.
*/
/* These are assembly functions: */
extern void _mesa_test_os_sse_support( void );
extern void _mesa_test_os_sse_exception_support( void );
#if defined(_WIN32)
#ifndef STATUS_FLOAT_MULTIPLE_TRAPS
# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L)
#endif
static LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp)
{
PEXCEPTION_RECORD rec = exp->ExceptionRecord;
PCONTEXT ctx = exp->ContextRecord;
if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) {
_mesa_debug(NULL, "EXCEPTION_ILLEGAL_INSTRUCTION\n" );
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
} else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) {
_mesa_debug(NULL, "STATUS_FLOAT_MULTIPLE_TRAPS\n");
/* Windows seems to clear the exception flag itself, we just have to increment Eip */
} else {
_mesa_debug(NULL, "UNEXPECTED EXCEPTION (0x%08x), terminating!\n" );
return EXCEPTION_EXECUTE_HANDLER;
}
if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) {
_mesa_debug(NULL, "Context does not contain control registers, terminating!\n");
return EXCEPTION_EXECUTE_HANDLER;
}
ctx->Eip += 3;
return EXCEPTION_CONTINUE_EXECUTION;
}
#endif /* _WIN32 */
/**
* Check if SSE is supported.
* If not, turn off the X86_FEATURE_XMM flag in _mesa_x86_cpu_features.
*/
void _mesa_check_os_sse_support( void )
{
#if defined(__FreeBSD__)
{
int ret, enabled;
unsigned int len;
len = sizeof(enabled);
ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0);
if (ret || !enabled)
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
#elif defined (__NetBSD__)
{
int ret, enabled;
size_t len = sizeof(enabled);
ret = sysctlbyname("machdep.sse", &enabled, &len, (void *)NULL, 0);
if (ret || !enabled)
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
#elif defined(__OpenBSD__)
{
int mib[2];
int ret, enabled;
size_t len = sizeof(enabled);
mib[0] = CTL_MACHDEP;
mib[1] = CPU_SSE;
ret = sysctl(mib, 2, &enabled, &len, NULL, 0);
if (ret || !enabled)
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
#elif defined(_WIN32)
LPTOP_LEVEL_EXCEPTION_FILTER oldFilter;
/* Install our ExceptionFilter */
oldFilter = SetUnhandledExceptionFilter( ExceptionFilter );
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Testing OS support for SSE...\n");
_mesa_test_os_sse_support();
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Yes.\n");
} else {
_mesa_debug(NULL, "No!\n");
}
}
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Testing OS support for SSE unmasked exceptions...\n");
_mesa_test_os_sse_exception_support();
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Yes.\n");
} else {
_mesa_debug(NULL, "No!\n");
}
}
/* Restore previous exception filter */
SetUnhandledExceptionFilter( oldFilter );
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Tests of OS support for SSE passed.\n");
} else {
_mesa_debug(NULL, "Tests of OS support for SSE failed!\n");
}
#else
/* Do nothing on other platforms for now.
*/
if (detection_debug)
_mesa_debug(NULL, "Not testing OS support for SSE, leaving enabled.\n");
#endif /* __FreeBSD__ */
}
#endif /* USE_SSE_ASM */
/**
* Initialize the _mesa_x86_cpu_features bitfield.
* This is a no-op if called more than once.
*/
void
_mesa_get_x86_features(void)
{
static int called = 0;
if (called)
return;
called = 1;
#ifdef USE_X86_ASM
_mesa_x86_cpu_features = 0x0;
if (getenv( "MESA_NO_ASM")) {
return;
}
if (!_mesa_x86_has_cpuid()) {
_mesa_debug(NULL, "CPUID not detected\n");
}
else {
GLuint cpu_features, cpu_features_ecx;
GLuint cpu_ext_features;
GLuint cpu_ext_info;
char cpu_vendor[13];
GLuint result;
/* get vendor name */
_mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4));
cpu_vendor[12] = '\0';
if (detection_debug)
_mesa_debug(NULL, "CPU vendor: %s\n", cpu_vendor);
/* get cpu features */
cpu_features = _mesa_x86_cpuid_edx(1);
cpu_features_ecx = _mesa_x86_cpuid_ecx(1);
if (cpu_features & X86_CPU_FPU)
_mesa_x86_cpu_features |= X86_FEATURE_FPU;
if (cpu_features & X86_CPU_CMOV)
_mesa_x86_cpu_features |= X86_FEATURE_CMOV;
#ifdef USE_MMX_ASM
if (cpu_features & X86_CPU_MMX)
_mesa_x86_cpu_features |= X86_FEATURE_MMX;
#endif
#ifdef USE_SSE_ASM
if (cpu_features & X86_CPU_XMM)
_mesa_x86_cpu_features |= X86_FEATURE_XMM;
if (cpu_features & X86_CPU_XMM2)
_mesa_x86_cpu_features |= X86_FEATURE_XMM2;
if (cpu_features_ecx & X86_CPU_SSE4_1)
_mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
#endif
/* query extended cpu features */
if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) {
if (cpu_ext_info >= 0x80000001) {
cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001);
if (cpu_features & X86_CPU_MMX) {
#ifdef USE_3DNOW_ASM
if (cpu_ext_features & X86_CPUEXT_3DNOW)
_mesa_x86_cpu_features |= X86_FEATURE_3DNOW;
if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT)
_mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT;
#endif
#ifdef USE_MMX_ASM
if (cpu_ext_features & X86_CPUEXT_MMX_EXT)
_mesa_x86_cpu_features |= X86_FEATURE_MMXEXT;
#endif
}
}
/* query cpu name */
if (cpu_ext_info >= 0x80000002) {
GLuint ofs;
char cpu_name[49];
for (ofs = 0; ofs < 3; ofs++)
_mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12));
cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */
if (detection_debug)
_mesa_debug(NULL, "CPU name: %s\n", cpu_name);
}
}
}
#ifdef USE_MMX_ASM
if ( cpu_has_mmx ) {
if ( getenv( "MESA_NO_MMX" ) == 0 ) {
if (detection_debug)
_mesa_debug(NULL, "MMX cpu detected.\n");
} else {
_mesa_x86_cpu_features &= ~(X86_FEATURE_MMX);
}
}
#endif
#ifdef USE_3DNOW_ASM
if ( cpu_has_3dnow ) {
if ( getenv( "MESA_NO_3DNOW" ) == 0 ) {
if (detection_debug)
_mesa_debug(NULL, "3DNow! cpu detected.\n");
} else {
_mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW);
}
}
#endif
#ifdef USE_SSE_ASM
if ( cpu_has_xmm ) {
if ( getenv( "MESA_NO_SSE" ) == 0 ) {
if (detection_debug)
_mesa_debug(NULL, "SSE cpu detected.\n");
if ( getenv( "MESA_FORCE_SSE" ) == 0 ) {
_mesa_check_os_sse_support();
}
} else {
_mesa_debug(NULL, "SSE cpu detected, but switched off by user.\n");
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
}
#endif
#elif defined(USE_X86_64_ASM)
{
unsigned int eax, ebx, ecx, edx;
/* Always available on x86-64. */
_mesa_x86_cpu_features |= X86_FEATURE_XMM | X86_FEATURE_XMM2;
if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
return;
if (ecx & bit_SSE4_1)
_mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
}
#endif /* USE_X86_64_ASM */
(void) detection_debug;
}

View file

@ -1,219 +0,0 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Check extended CPU capabilities. Now justs returns the raw CPUID
* feature information, allowing the higher level code to interpret the
* results.
*
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*
* Cleaned up and simplified by Gareth Hughes <gareth@valinux.com>
*
*/
/*
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
* in there will break the build on some platforms.
*/
#include "assyntax.h"
#include "common_x86_features.h"
SEG_TEXT
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_has_cpuid)
HIDDEN(_mesa_x86_has_cpuid)
GLNAME(_mesa_x86_has_cpuid):
_CET_ENDBR
/* Test for the CPUID command. If the ID Flag bit in EFLAGS
* (bit 21) is writable, the CPUID command is present */
PUSHF_L
POP_L (EAX)
MOV_L (EAX, ECX)
XOR_L (CONST(0x00200000), EAX)
PUSH_L (EAX)
POPF_L
PUSHF_L
POP_L (EAX)
/* Verify the ID Flag bit has been written. */
CMP_L (ECX, EAX)
SETNE (AL)
XOR_L (CONST(0xff), EAX)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid)
HIDDEN(_mesa_x86_cpuid)
GLNAME(_mesa_x86_cpuid):
_CET_ENDBR
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EDI)
PUSH_L (EBX)
CPUID
MOV_L (REGOFF(16, ESP), EDI) /* *eax */
MOV_L (EAX, REGIND(EDI))
MOV_L (REGOFF(20, ESP), EDI) /* *ebx */
MOV_L (EBX, REGIND(EDI))
MOV_L (REGOFF(24, ESP), EDI) /* *ecx */
MOV_L (ECX, REGIND(EDI))
MOV_L (REGOFF(28, ESP), EDI) /* *edx */
MOV_L (EDX, REGIND(EDI))
POP_L (EBX)
POP_L (EDI)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid_eax)
HIDDEN(_mesa_x86_cpuid_eax)
GLNAME(_mesa_x86_cpuid_eax):
_CET_ENDBR
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EBX)
CPUID
POP_L (EBX)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid_ebx)
HIDDEN(_mesa_x86_cpuid_ebx)
GLNAME(_mesa_x86_cpuid_ebx):
_CET_ENDBR
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EBX)
CPUID
MOV_L (EBX, EAX) /* return EBX */
POP_L (EBX)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid_ecx)
HIDDEN(_mesa_x86_cpuid_ecx)
GLNAME(_mesa_x86_cpuid_ecx):
_CET_ENDBR
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EBX)
CPUID
MOV_L (ECX, EAX) /* return ECX */
POP_L (EBX)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid_edx)
HIDDEN(_mesa_x86_cpuid_edx)
GLNAME(_mesa_x86_cpuid_edx):
_CET_ENDBR
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EBX)
CPUID
MOV_L (EDX, EAX) /* return EDX */
POP_L (EBX)
RET
#ifdef USE_SSE_ASM
/* Execute an SSE instruction to see if the operating system correctly
* supports SSE. A signal handler for SIGILL should have been set
* before calling this function, otherwise this could kill the client
* application.
*
* -----> !!!! ATTENTION DEVELOPERS !!!! <-----
*
* If you're debugging with gdb and you get stopped in this function,
* just type 'continue'! Execution will proceed normally.
* See freedesktop.org bug #1709 for more info.
*/
ALIGNTEXT4
GLOBL GLNAME( _mesa_test_os_sse_support )
HIDDEN(_mesa_test_os_sse_support)
GLNAME( _mesa_test_os_sse_support ):
_CET_ENDBR
XORPS ( XMM0, XMM0 )
RET
/* Perform an SSE divide-by-zero to see if the operating system
* correctly supports unmasked SIMD FPU exceptions. Signal handlers for
* SIGILL and SIGFPE should have been set before calling this function,
* otherwise this could kill the client application.
*/
ALIGNTEXT4
GLOBL GLNAME( _mesa_test_os_sse_exception_support )
HIDDEN(_mesa_test_os_sse_exception_support)
GLNAME( _mesa_test_os_sse_exception_support ):
_CET_ENDBR
PUSH_L ( EBP )
MOV_L ( ESP, EBP )
SUB_L ( CONST( 8 ), ESP )
/* Save the original MXCSR register value.
*/
STMXCSR ( REGOFF( -4, EBP ) )
/* Unmask the divide-by-zero exception and perform one.
*/
STMXCSR ( REGOFF( -8, EBP ) )
AND_L ( CONST( 0xfffffdff ), REGOFF( -8, EBP ) )
LDMXCSR ( REGOFF( -8, EBP ) )
XORPS ( XMM0, XMM0 )
PUSH_L ( CONST( 0x3f800000 ) )
PUSH_L ( CONST( 0x3f800000 ) )
PUSH_L ( CONST( 0x3f800000 ) )
PUSH_L ( CONST( 0x3f800000 ) )
MOVUPS ( REGIND( ESP ), XMM1 )
DIVPS ( XMM0, XMM1 )
/* Restore the original MXCSR register value.
*/
LDMXCSR ( REGOFF( -4, EBP ) )
LEAVE
RET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

View file

@ -1,53 +0,0 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Check CPU capabilities & initialize optimized funtions for this particular
* processor.
*
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
* Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
* new SSE functions
*
* Reimplemented by Gareth Hughes in a more
* future-proof manner, based on code in the Linux kernel.
*/
#ifndef __COMMON_X86_ASM_H__
#define __COMMON_X86_ASM_H__
/* Do not reference mtypes.h from this file.
*/
#include "common_x86_features.h"
extern int _mesa_x86_cpu_features;
extern void _mesa_get_x86_features(void);
extern void _mesa_check_os_sse_support(void);
extern void _mesa_init_all_x86_transform_asm( void );
#endif

View file

@ -1,97 +0,0 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* x86 CPUID feature information. The raw data is returned by
* _mesa_identify_x86_cpu_features() and interpreted with the cpu_has_*
* helper macros.
*
* Gareth Hughes
*/
#ifndef __COMMON_X86_FEATURES_H__
#define __COMMON_X86_FEATURES_H__
#define X86_FEATURE_FPU (1<<0)
#define X86_FEATURE_CMOV (1<<1)
#define X86_FEATURE_MMXEXT (1<<2)
#define X86_FEATURE_MMX (1<<3)
#define X86_FEATURE_FXSR (1<<4)
#define X86_FEATURE_XMM (1<<5)
#define X86_FEATURE_XMM2 (1<<6)
#define X86_FEATURE_3DNOWEXT (1<<7)
#define X86_FEATURE_3DNOW (1<<8)
#define X86_FEATURE_SSE4_1 (1<<9)
/* standard X86 CPU features */
#define X86_CPU_FPU (1<<0)
#define X86_CPU_CMOV (1<<15)
#define X86_CPU_MMX (1<<23)
#define X86_CPU_XMM (1<<25)
#define X86_CPU_XMM2 (1<<26)
/* ECX. */
#define X86_CPU_SSE4_1 (1<<19)
/* extended X86 CPU features */
#define X86_CPUEXT_MMX_EXT (1<<22)
#define X86_CPUEXT_3DNOW_EXT (1<<30)
#define X86_CPUEXT_3DNOW (1<<31)
#ifdef __MMX__
#define cpu_has_mmx 1
#else
#define cpu_has_mmx (_mesa_x86_cpu_features & X86_FEATURE_MMX)
#endif
#define cpu_has_mmxext (_mesa_x86_cpu_features & X86_FEATURE_MMXEXT)
#if defined(__SSE__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || defined(_M_X64)
#define cpu_has_xmm 1
#else
#define cpu_has_xmm (_mesa_x86_cpu_features & X86_FEATURE_XMM)
#endif
#if defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64)
#define cpu_has_xmm2 1
#else
#define cpu_has_xmm2 (_mesa_x86_cpu_features & X86_FEATURE_XMM2)
#endif
#ifdef __3dNOW__
#define cpu_has_3dnow 1
#else
#define cpu_has_3dnow (_mesa_x86_cpu_features & X86_FEATURE_3DNOW)
#endif
#define cpu_has_3dnowext (_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT)
#ifdef __SSE4_1__
#define cpu_has_sse4_1 1
#else
#define cpu_has_sse4_1 (_mesa_x86_cpu_features & X86_FEATURE_SSE4_1)
#endif
#endif

View file

@ -1,687 +0,0 @@
/*
* (C) Copyright IBM Corporation 2004
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file read_rgba_span_x86.S
* Optimized routines to transfer pixel data from the framebuffer to a
* buffer in main memory.
*
* \author Ian Romanick <idr@us.ibm.com>
*/
/* Control flow enforcement support */
#ifdef HAVE_CET_H
#include <cet.h>
#else
#define _CET_ENDBR
#endif
.file "read_rgba_span_x86.S"
#if !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
/* Kevin F. Quinn 2nd July 2006
* Replaced data segment constants with text-segment instructions.
*/
#define LOAD_MASK(mvins,m1,m2) \
pushl $0xff00ff00 ;\
pushl $0xff00ff00 ;\
pushl $0xff00ff00 ;\
pushl $0xff00ff00 ;\
mvins (%esp), m1 ;\
pushl $0x00ff0000 ;\
pushl $0x00ff0000 ;\
pushl $0x00ff0000 ;\
pushl $0x00ff0000 ;\
mvins (%esp), m2 ;\
addl $32, %esp
/* I implemented these as macros because they appear in several places,
* and I've tweaked them a number of times. I got tired of changing every
* place they appear. :)
*/
#define DO_ONE_PIXEL() \
movl (%ebx), %eax ; \
addl $4, %ebx ; \
bswap %eax /* ARGB -> BGRA */ ; \
rorl $8, %eax /* BGRA -> ABGR */ ; \
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
addl $4, %ecx
#define DO_ONE_LAST_PIXEL() \
movl (%ebx), %eax ; \
bswap %eax /* ARGB -> BGRA */ ; \
rorl $8, %eax /* BGRA -> ABGR */ ; \
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ;
/**
* MMX optimized version of the BGRA8888_REV to RGBA copy routine.
*
* \warning
* This function assumes that the caller will issue the EMMS instruction
* at the correct places.
*/
.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
.type _generic_read_RGBA_span_BGRA8888_REV_MMX, @function
_generic_read_RGBA_span_BGRA8888_REV_MMX:
_CET_ENDBR
pushl %ebx
#ifdef USE_INNER_EMMS
emms
#endif
LOAD_MASK(movq,%mm1,%mm2)
movl 8(%esp), %ebx /* source pointer */
movl 16(%esp), %edx /* number of pixels to copy */
movl 12(%esp), %ecx /* destination pointer */
testl %edx, %edx
jle .L20 /* Bail if there's nothing to do. */
movl %ebx, %eax
negl %eax
sarl $2, %eax
andl $1, %eax
je .L17
subl %eax, %edx
DO_ONE_PIXEL()
.L17:
/* Would it be faster to unroll this loop once and process 4 pixels
* per pass, instead of just two?
*/
movl %edx, %eax
shrl %eax
jmp .L18
.L19:
movq (%ebx), %mm0
addl $8, %ebx
/* These 9 instructions do what PSHUFB (if there were such an
* instruction) could do in 1. :(
*/
movq %mm0, %mm3
movq %mm0, %mm4
pand %mm2, %mm3
psllq $16, %mm4
psrlq $16, %mm3
pand %mm2, %mm4
pand %mm1, %mm0
por %mm4, %mm3
por %mm3, %mm0
movq %mm0, (%ecx)
addl $8, %ecx
subl $1, %eax
.L18:
jne .L19
#ifdef USE_INNER_EMMS
emms
#endif
/* At this point there are either 1 or 0 pixels remaining to be
* converted. Convert the last pixel, if needed.
*/
testl $1, %edx
je .L20
DO_ONE_LAST_PIXEL()
.L20:
popl %ebx
ret
.size _generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
/**
* SSE optimized version of the BGRA8888_REV to RGBA copy routine. SSE
* instructions are only actually used to read data from the framebuffer.
* In practice, the speed-up is pretty small.
*
* \todo
* Do some more testing and determine if there's any reason to have this
* function in addition to the MMX version.
*
* \warning
* This function assumes that the caller will issue the EMMS instruction
* at the correct places.
*/
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
.type _generic_read_RGBA_span_BGRA8888_REV_SSE, @function
_generic_read_RGBA_span_BGRA8888_REV_SSE:
_CET_ENDBR
pushl %esi
pushl %ebx
pushl %ebp
#ifdef USE_INNER_EMMS
emms
#endif
LOAD_MASK(movq,%mm1,%mm2)
movl 16(%esp), %ebx /* source pointer */
movl 24(%esp), %edx /* number of pixels to copy */
movl 20(%esp), %ecx /* destination pointer */
testl %edx, %edx
jle .L35 /* Bail if there's nothing to do. */
movl %esp, %ebp
subl $16, %esp
andl $0xfffffff0, %esp
movl %ebx, %eax
movl %edx, %esi
negl %eax
andl $15, %eax
sarl $2, %eax
cmpl %edx, %eax
cmovle %eax, %esi
subl %esi, %edx
testl $1, %esi
je .L32
DO_ONE_PIXEL()
.L32:
testl $2, %esi
je .L31
movq (%ebx), %mm0
addl $8, %ebx
movq %mm0, %mm3
movq %mm0, %mm4
pand %mm2, %mm3
psllq $16, %mm4
psrlq $16, %mm3
pand %mm2, %mm4
pand %mm1, %mm0
por %mm4, %mm3
por %mm3, %mm0
movq %mm0, (%ecx)
addl $8, %ecx
.L31:
movl %edx, %eax
shrl $2, %eax
jmp .L33
.L34:
movaps (%ebx), %xmm0
addl $16, %ebx
/* This would be so much better if we could just move directly from
* an SSE register to an MMX register. Unfortunately, that
* functionality wasn't introduced until SSE2 with the MOVDQ2Q
* instruction.
*/
movaps %xmm0, (%esp)
movq (%esp), %mm0
movq 8(%esp), %mm5
movq %mm0, %mm3
movq %mm0, %mm4
movq %mm5, %mm6
movq %mm5, %mm7
pand %mm2, %mm3
pand %mm2, %mm6
psllq $16, %mm4
psllq $16, %mm7
psrlq $16, %mm3
psrlq $16, %mm6
pand %mm2, %mm4
pand %mm2, %mm7
pand %mm1, %mm0
pand %mm1, %mm5
por %mm4, %mm3
por %mm7, %mm6
por %mm3, %mm0
por %mm6, %mm5
movq %mm0, (%ecx)
movq %mm5, 8(%ecx)
addl $16, %ecx
subl $1, %eax
.L33:
jne .L34
#ifdef USE_INNER_EMMS
emms
#endif
movl %ebp, %esp
/* At this point there are either [0, 3] pixels remaining to be
* converted.
*/
testl $2, %edx
je .L36
movq (%ebx), %mm0
addl $8, %ebx
movq %mm0, %mm3
movq %mm0, %mm4
pand %mm2, %mm3
psllq $16, %mm4
psrlq $16, %mm3
pand %mm2, %mm4
pand %mm1, %mm0
por %mm4, %mm3
por %mm3, %mm0
movq %mm0, (%ecx)
addl $8, %ecx
.L36:
testl $1, %edx
je .L35
DO_ONE_LAST_PIXEL()
.L35:
popl %ebp
popl %ebx
popl %esi
ret
.size _generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
/**
* SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
*/
.text
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
.type _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
_generic_read_RGBA_span_BGRA8888_REV_SSE2:
_CET_ENDBR
pushl %esi
pushl %ebx
LOAD_MASK(movdqu,%xmm1,%xmm2)
movl 12(%esp), %ebx /* source pointer */
movl 20(%esp), %edx /* number of pixels to copy */
movl 16(%esp), %ecx /* destination pointer */
movl %ebx, %eax
movl %edx, %esi
testl %edx, %edx
jle .L46 /* Bail if there's nothing to do. */
/* If the source pointer isn't a multiple of 16 we have to process
* a few pixels the "slow" way to get the address aligned for
* the SSE fetch intsructions.
*/
negl %eax
andl $15, %eax
sarl $2, %eax
cmpl %edx, %eax
cmovbe %eax, %esi
subl %esi, %edx
testl $1, %esi
je .L41
DO_ONE_PIXEL()
.L41:
testl $2, %esi
je .L40
movq (%ebx), %xmm0
addl $8, %ebx
movdqa %xmm0, %xmm3
movdqa %xmm0, %xmm4
andps %xmm1, %xmm0
andps %xmm2, %xmm3
pslldq $2, %xmm4
psrldq $2, %xmm3
andps %xmm2, %xmm4
orps %xmm4, %xmm3
orps %xmm3, %xmm0
movq %xmm0, (%ecx)
addl $8, %ecx
.L40:
/* Would it be worth having a specialized version of this loop for
* the case where the destination is 16-byte aligned? That version
* would be identical except that it could use movedqa instead of
* movdqu.
*/
movl %edx, %eax
shrl $2, %eax
jmp .L42
.L43:
movdqa (%ebx), %xmm0
addl $16, %ebx
movdqa %xmm0, %xmm3
movdqa %xmm0, %xmm4
andps %xmm1, %xmm0
andps %xmm2, %xmm3
pslldq $2, %xmm4
psrldq $2, %xmm3
andps %xmm2, %xmm4
orps %xmm4, %xmm3
orps %xmm3, %xmm0
movdqu %xmm0, (%ecx)
addl $16, %ecx
subl $1, %eax
.L42:
jne .L43
/* There may be upto 3 pixels remaining to be copied. Take care
* of them now. We do the 2 pixel case first because the data
* will be aligned.
*/
testl $2, %edx
je .L47
movq (%ebx), %xmm0
addl $8, %ebx
movdqa %xmm0, %xmm3
movdqa %xmm0, %xmm4
andps %xmm1, %xmm0
andps %xmm2, %xmm3
pslldq $2, %xmm4
psrldq $2, %xmm3
andps %xmm2, %xmm4
orps %xmm4, %xmm3
orps %xmm3, %xmm0
movq %xmm0, (%ecx)
addl $8, %ecx
.L47:
testl $1, %edx
je .L46
DO_ONE_LAST_PIXEL()
.L46:
popl %ebx
popl %esi
ret
.size _generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
#define MASK_565_L 0x07e0f800
#define MASK_565_H 0x0000001f
/* Setting SCALE_ADJUST to 5 gives a perfect match with the
* classic C implementation in Mesa. Setting SCALE_ADJUST
* to 0 is slightly faster but at a small cost to accuracy.
*/
#define SCALE_ADJUST 5
#if SCALE_ADJUST == 5
#define PRESCALE_L 0x00100001
#define PRESCALE_H 0x00000200
#define SCALE_L 0x40C620E8
#define SCALE_H 0x0000839d
#elif SCALE_ADJUST == 0
#define PRESCALE_L 0x00200001
#define PRESCALE_H 0x00000800
#define SCALE_L 0x01040108
#define SCALE_H 0x00000108
#else
#error SCALE_ADJUST must either be 5 or 0.
#endif
#define ALPHA_L 0x00000000
#define ALPHA_H 0x00ff0000
/**
* MMX optimized version of the RGB565 to RGBA copy routine.
*/
.text
.globl _generic_read_RGBA_span_RGB565_MMX
.hidden _generic_read_RGBA_span_RGB565_MMX
.type _generic_read_RGBA_span_RGB565_MMX, @function
_generic_read_RGBA_span_RGB565_MMX:
_CET_ENDBR
#ifdef USE_INNER_EMMS
emms
#endif
movl 4(%esp), %eax /* source pointer */
movl 8(%esp), %edx /* destination pointer */
movl 12(%esp), %ecx /* number of pixels to copy */
pushl $MASK_565_H
pushl $MASK_565_L
movq (%esp), %mm5
pushl $PRESCALE_H
pushl $PRESCALE_L
movq (%esp), %mm6
pushl $SCALE_H
pushl $SCALE_L
movq (%esp), %mm7
pushl $ALPHA_H
pushl $ALPHA_L
movq (%esp), %mm3
addl $32,%esp
sarl $2, %ecx
jl .L01 /* Bail early if the count is negative. */
jmp .L02
.L03:
/* Fetch 4 RGB565 pixels into %mm4. Distribute the first and
* second pixels into the four words of %mm0 and %mm2.
*/
movq (%eax), %mm4
addl $8, %eax
pshufw $0x00, %mm4, %mm0
pshufw $0x55, %mm4, %mm2
/* Mask the pixels so that each word of each register contains only
* one color component.
*/
pand %mm5, %mm0
pand %mm5, %mm2
/* Adjust the component values so that they are as small as possible,
* but large enough so that we can multiply them by an unsigned 16-bit
* number and get a value as large as 0x00ff0000.
*/
pmullw %mm6, %mm0
pmullw %mm6, %mm2
#if SCALE_ADJUST > 0
psrlw $SCALE_ADJUST, %mm0
psrlw $SCALE_ADJUST, %mm2
#endif
/* Scale the input component values to be on the range
* [0, 0x00ff0000]. This it the real magic of the whole routine.
*/
pmulhuw %mm7, %mm0
pmulhuw %mm7, %mm2
/* Always set the alpha value to 0xff.
*/
por %mm3, %mm0
por %mm3, %mm2
/* Pack the 16-bit values to 8-bit values and store the converted
* pixel data.
*/
packuswb %mm2, %mm0
movq %mm0, (%edx)
addl $8, %edx
pshufw $0xaa, %mm4, %mm0
pshufw $0xff, %mm4, %mm2
pand %mm5, %mm0
pand %mm5, %mm2
pmullw %mm6, %mm0
pmullw %mm6, %mm2
#if SCALE_ADJUST > 0
psrlw $SCALE_ADJUST, %mm0
psrlw $SCALE_ADJUST, %mm2
#endif
pmulhuw %mm7, %mm0
pmulhuw %mm7, %mm2
por %mm3, %mm0
por %mm3, %mm2
packuswb %mm2, %mm0
movq %mm0, (%edx)
addl $8, %edx
subl $1, %ecx
.L02:
jne .L03
/* At this point there can be at most 3 pixels left to process. If
* there is either 2 or 3 left, process 2.
*/
movl 12(%esp), %ecx
testl $0x02, %ecx
je .L04
movd (%eax), %mm4
addl $4, %eax
pshufw $0x00, %mm4, %mm0
pshufw $0x55, %mm4, %mm2
pand %mm5, %mm0
pand %mm5, %mm2
pmullw %mm6, %mm0
pmullw %mm6, %mm2
#if SCALE_ADJUST > 0
psrlw $SCALE_ADJUST, %mm0
psrlw $SCALE_ADJUST, %mm2
#endif
pmulhuw %mm7, %mm0
pmulhuw %mm7, %mm2
por %mm3, %mm0
por %mm3, %mm2
packuswb %mm2, %mm0
movq %mm0, (%edx)
addl $8, %edx
.L04:
/* At this point there can be at most 1 pixel left to process.
* Process it if needed.
*/
testl $0x01, %ecx
je .L01
movzwl (%eax), %ecx
movd %ecx, %mm4
pshufw $0x00, %mm4, %mm0
pand %mm5, %mm0
pmullw %mm6, %mm0
#if SCALE_ADJUST > 0
psrlw $SCALE_ADJUST, %mm0
#endif
pmulhuw %mm7, %mm0
por %mm3, %mm0
packuswb %mm0, %mm0
movd %mm0, (%edx)
.L01:
#ifdef USE_INNER_EMMS
emms
#endif
ret
#endif /* !defined(__MINGW32__) && !defined(__APPLE__) */
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif