Major audit of all Mesa's x86 assembly code. This round is basically

general cleanups - more to come.

Added P6 architecture timing to debug_xform routines.  Still need to add
test_all_vertex_functions test for the v16 asm.  Dynamic reconfiguration
of counter overhead for more accurate benchmarking.
This commit is contained in:
Gareth Hughes 2000-10-23 00:16:28 +00:00
parent e188b6e1f1
commit fc2427e81b
16 changed files with 961 additions and 306 deletions

View file

@ -1,4 +1,4 @@
/* $Id: blend.c,v 1.19 2000/10/19 18:08:05 brianp Exp $ */
/* $Id: blend.c,v 1.20 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -826,7 +826,7 @@ blend_general( GLcontext *ctx, GLuint n, const GLubyte mask[],
#if defined(USE_MMX_ASM)
#include "X86/mmx.h"
#include "X86/common_x86asm.h"
#include "X86/common_x86_asm.h"
#endif
@ -846,8 +846,8 @@ static void set_blend_function( GLcontext *ctx )
/* Hmm. A table here would have 12^4 == way too many entries.
* Provide a hook for MMX instead.
*/
if (gl_x86_cpu_features & GL_CPU_MMX) {
gl_mmx_set_blend_function (ctx);
if ( cpu_has_mmx ) {
gl_mmx_set_blend_function( ctx );
}
else
#endif

View file

@ -1,4 +1,4 @@
/* $Id: context.c,v 1.94 2000/10/21 00:02:47 brianp Exp $ */
/* $Id: context.c,v 1.95 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -454,6 +454,7 @@ one_time_init( void )
gl_init_translate();
gl_init_vbrender();
gl_init_vbxform();
gl_init_vertices();
if (getenv("MESA_DEBUG")) {
_glapi_noop_enable_warnings(GL_TRUE);
@ -884,7 +885,7 @@ init_attrib_groups( GLcontext *ctx )
ctx->Current.Primitive = (GLenum) (GL_POLYGON + 1);
ctx->Current.Flag = (VERT_NORM |
VERT_INDEX |
VERT_INDEX |
VERT_RGBA |
VERT_EDGE |
VERT_TEX0_1 |
@ -2114,4 +2115,4 @@ GLenum gl_reduce_prim[GL_POLYGON+1] = {
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
};
};

View file

@ -1,4 +1,4 @@
/* $Id: 3dnow.c,v 1.7 2000/09/17 21:12:40 gareth Exp $ */
/* $Id: 3dnow.c,v 1.8 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -24,88 +24,75 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* 3DNow! optimizations contributed by
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*/
#if defined(USE_3DNOW_ASM) && defined(USE_X86_ASM)
#include "3dnow.h"
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "glheader.h"
#include "context.h"
#include "types.h"
#include "xform.h"
#include "vertices.h"
#include "xform.h"
#include "3dnow.h"
#ifdef DEBUG
#include "debug_xform.h"
#endif
#define XFORM_ARGS GLvector4f *to_vec, \
const GLfloat m[16], \
const GLvector4f *from_vec, \
const GLubyte *mask, \
#define XFORM_ARGS GLvector4f *to_vec, \
const GLfloat m[16], \
const GLvector4f *from_vec, \
const GLubyte *mask, \
const GLubyte flag
#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS );
#define DECLARE_XFORM_GROUP( pfx, v, masked ) \
extern void _ASMAPI gl_##pfx##_transform_points##v##_general_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##v##_identity_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##v##_3d_no_rot_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##v##_perspective_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##v##_2d_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##v##_2d_no_rot_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##v##_3d_##masked(XFORM_ARGS);
#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \
gl_transform_tab[cma][sz][MATRIX_GENERAL] = \
gl_##pfx##_transform_points##sz##_general_##masked; \
gl_transform_tab[cma][sz][MATRIX_IDENTITY] = \
gl_##pfx##_transform_points##sz##_identity_##masked; \
gl_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \
gl_##pfx##_transform_points##sz##_3d_no_rot_##masked; \
gl_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \
gl_##pfx##_transform_points##sz##_perspective_##masked; \
gl_transform_tab[cma][sz][MATRIX_2D] = \
gl_##pfx##_transform_points##sz##_2d_##masked; \
gl_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \
gl_##pfx##_transform_points##sz##_2d_no_rot_##masked; \
gl_transform_tab[cma][sz][MATRIX_3D] = \
gl_##pfx##_transform_points##sz##_3d_##masked;
#define ASSIGN_XFORM_GROUP( pfx, cma, vsize, masked ) \
gl_transform_tab[cma][vsize][MATRIX_GENERAL] = \
gl_##pfx##_transform_points##vsize##_general_##masked; \
gl_transform_tab[cma][vsize][MATRIX_IDENTITY] = \
gl_##pfx##_transform_points##vsize##_identity_##masked; \
gl_transform_tab[cma][vsize][MATRIX_3D_NO_ROT] = \
gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked; \
gl_transform_tab[cma][vsize][MATRIX_PERSPECTIVE] = \
gl_##pfx##_transform_points##vsize##_perspective_##masked; \
gl_transform_tab[cma][vsize][MATRIX_2D] = \
gl_##pfx##_transform_points##vsize##_2d_##masked; \
gl_transform_tab[cma][vsize][MATRIX_2D_NO_ROT] = \
gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked; \
gl_transform_tab[cma][vsize][MATRIX_3D] = \
gl_##pfx##_transform_points##vsize##_3d_##masked;
#define NORM_ARGS const GLmatrix *mat, \
GLfloat scale, \
const GLvector3f *in, \
const GLfloat *lengths, \
const GLubyte mask[], \
#define NORM_ARGS const GLmatrix *mat, \
GLfloat scale, \
const GLvector3f *in, \
const GLfloat *lengths, \
const GLubyte mask[], \
GLvector3f *dest
#define DECLARE_NORM_GROUP( pfx, masked ) \
extern void _ASMAPI gl_##pfx##_rescale_normals_##masked(NORM_ARGS); \
extern void _ASMAPI gl_##pfx##_normalize_normals_##masked(NORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_normals_##masked(NORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_normals_no_rot_##masked(NORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_rescale_normals_##masked(NORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_rescale_normals_no_rot_##masked(NORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_normalize_normals_##masked(NORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_normalize_normals_no_rot_##masked(NORM_ARGS);
extern void _ASMAPI gl_##pfx##_rescale_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_normalize_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_normals_no_rot_##masked( NORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_rescale_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_rescale_normals_no_rot_##masked( NORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_normalize_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_normalize_normals_no_rot_##masked( NORM_ARGS );
#define ASSIGN_NORM_GROUP( pfx, cma, masked ) \
@ -127,24 +114,7 @@
gl_##pfx##_transform_normalize_normals_no_rot_##masked;
extern void _ASMAPI gl_3dnow_project_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride );
extern void _ASMAPI gl_3dnow_project_clipped_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride,
const GLubyte *clipmask );
extern void _ASMAPI gl_v16_3dnow_general_xform( GLfloat *first_vert,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
GLuint count );
#ifdef USE_3DNOW_ASM
DECLARE_XFORM_GROUP( 3dnow, 1, raw )
DECLARE_XFORM_GROUP( 3dnow, 2, raw )
DECLARE_XFORM_GROUP( 3dnow, 3, raw )
@ -159,8 +129,28 @@ DECLARE_NORM_GROUP( 3dnow, raw )
/*DECLARE_NORM_GROUP( 3dnow, masked )*/
void gl_init_3dnow_asm_transforms( void )
extern void _ASMAPI gl_v16_3dnow_general_xform( GLfloat *first_vert,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
GLuint count );
extern void _ASMAPI gl_3dnow_project_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride );
extern void _ASMAPI gl_3dnow_project_clipped_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride,
const GLubyte *clipmask );
#endif
void gl_init_3dnow_transform_asm( void )
{
#ifdef USE_3DNOW_ASM
ASSIGN_XFORM_GROUP( 3dnow, 0, 1, raw );
ASSIGN_XFORM_GROUP( 3dnow, 0, 2, raw );
ASSIGN_XFORM_GROUP( 3dnow, 0, 3, raw );
@ -178,21 +168,18 @@ void gl_init_3dnow_asm_transforms( void )
gl_test_all_transform_functions( "3DNow!" );
gl_test_all_normal_transform_functions( "3DNow!" );
#endif
/* Hook in some stuff for vertices.c.
*/
gl_xform_points3_v16_general = gl_v16_3dnow_general_xform;
gl_project_v16 = gl_3dnow_project_vertices;
gl_project_clipped_v16 = gl_3dnow_project_clipped_vertices;
}
#else
/* silence compiler warning */
extern void _mesa_3dnow_dummy_function( void );
void _mesa_3dnow_dummy_function( void )
{
}
#endif
}
void gl_init_3dnow_vertex_asm( void )
{
#ifdef USE_3DNOW_ASM
gl_xform_points3_v16_general = gl_v16_3dnow_general_xform;
gl_project_v16 = gl_3dnow_project_vertices;
gl_project_clipped_v16 = gl_3dnow_project_clipped_vertices;
#if 0
gl_test_all_vertex_functions( "3DNow!" );
#endif
#endif
}

View file

@ -1,21 +1,21 @@
/* $Id: 3dnow.h,v 1.1 1999/08/19 00:55:42 jtg Exp $ */
/* $Id: 3dnow.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.1
*
* Version: 3.5
*
* Copyright (C) 1999 Brian Paul All Rights Reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@ -24,72 +24,17 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* 3DNow! optimizations contributed by
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*/
#ifndef _3dnow_h
#define _3dnow_h
#ifndef __3DNOW_H__
#define __3DNOW_H__
#include "xform.h"
void gl_init_3dnow_asm_transforms (void);
#if 0
GLvector4f *gl_project_points( GLvector4f *proj_vec,
const GLvector4f *clip_vec )
{
__asm__ (
" femms \n"
" \n"
" movq (%0), %%mm0 # x1 | x0 \n"
" movq 8(%0), %%mm1 # oow | x2 \n"
" \n"
"1: movq %%mm1, %%mm2 # oow | x2 \n"
" addl %2, %0 # next point \n"
" \n"
" punpckhdq %%mm2, %%mm2 # oow | oow \n"
" addl $16, %1 # next point \n"
" \n"
" pfrcp %%mm2, %%mm3 # 1/oow | 1/oow \n"
" decl %3 \n"
" \n"
" pfmul %%mm3, %%mm0 # x1/oow | x0/oow \n"
" movq %%mm0, -16(%1) # write r0, r1 \n"
" \n"
" pfmul %%mm3, %%mm1 # 1 | x2/oow \n"
" movq (%0), %%mm0 # x1 | x0 \n"
" \n"
" movd %%mm1, 8(%1) # write r2 \n"
" movd %%mm3, 12(%1) # write r3 \n"
" \n"
" movq 8(%0), %%mm1 # oow | x2 \n"
" ja 1b \n"
" \n"
" femms \n"
" "
::"a" (clip_vec->start),
"c" (proj_vec->start),
"g" (clip_vec->stride),
"d" (clip_vec->count)
);
proj_vec->flags |= VEC_SIZE_4;
proj_vec->size = 3;
proj_vec->count = clip_vec->count;
return proj_vec;
}
#endif
void gl_init_3dnow_transform_asm( void );
void gl_init_3dnow_vertex_asm( void );
#endif

View file

@ -1,4 +1,4 @@
/* $Id: assyntax.h,v 1.15 2000/09/18 22:49:04 gareth Exp $ */
/* $Id: assyntax.h,v 1.16 2000/10/23 00:16:28 gareth Exp $ */
#ifndef __ASSYNTAX_H__
#define __ASSYNTAX_H__

76
src/mesa/x86/clip_args.h Normal file
View file

@ -0,0 +1,76 @@
/* $Id: clip_args.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Clip test function interface for assembly code. Simply define
* FRAME_OFFSET to the number of bytes pushed onto the stack before
* using the ARG_* argument macros.
*
* Gareth Hughes <gareth@valinux.com>
*/
#ifndef __CLIP_ARGS_H__
#define __CLIP_ARGS_H__
/* Offsets into GLvector4f
*/
#define V4F_DATA 0
#define V4F_START 4
#define V4F_COUNT 8
#define V4F_STRIDE 12
#define V4F_SIZE 16
#define V4F_FLAGS 20
/* GLvector4f flags
*/
#define VEC_SIZE_1 1
#define VEC_SIZE_2 3
#define VEC_SIZE_3 7
#define VEC_SIZE_4 15
/*
* Offsets for clip_func arguments
*
* typedef GLvector4f *(*clip_func)( GLvector4f *vClip,
* GLvector4f *vProj,
* GLubyte clipMask[],
* GLubyte *orMask,
* GLubyte *andMask );
*/
#define OFFSET_SOURCE 4
#define OFFSET_DEST 8
#define OFFSET_CLIP 12
#define OFFSET_OR 16
#define OFFSET_AND 20
#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP)
#define ARG_OR REGOFF(FRAME_OFFSET+OFFSET_OR, ESP)
#define ARG_AND REGOFF(FRAME_OFFSET+OFFSET_AND, ESP)
#endif

View file

@ -1,21 +1,21 @@
/* $Id: common_x86.c,v 1.6 2000/01/25 17:04:47 brianp Exp $ */
/* $Id: common_x86.c,v 1.7 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.3
*
* Version: 3.5
*
* Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@ -26,81 +26,102 @@
/*
* Check CPU capabilities & initialize optimized funtions for this particular
* processor.
* Check CPU capabilities & initialize optimized funtions for this particular
* processor.
*
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
* Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
* new Katmai functions
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
* Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
* new Katmai functions.
*/
#include <stdlib.h>
#include <stdio.h>
#include "common_x86asm.h"
#include "common_x86_asm.h"
int gl_x86_cpu_features = 0;
static void message(const char *msg)
/* No reason for this to be public.
*/
extern int gl_identify_x86_cpu_features( void );
static void message( const char *msg )
{
if (getenv("MESA_DEBUG"))
fprintf(stderr, "%s\n", msg);
if ( getenv( "MESA_DEBUG" ) ) {
fprintf( stderr, "%s\n", msg );
}
}
void gl_init_all_x86_asm (void)
void gl_init_all_x86_transform_asm( void )
{
#ifdef USE_X86_ASM
gl_x86_cpu_features = gl_identify_x86_cpu_features ();
gl_x86_cpu_features |= GL_CPU_AnyX86;
gl_x86_cpu_features = gl_identify_x86_cpu_features();
if (getenv("MESA_NO_ASM") != 0)
if ( getenv( "MESA_NO_ASM" ) ) {
gl_x86_cpu_features = 0;
if (gl_x86_cpu_features & GL_CPU_GenuineIntel) {
message("GenuineIntel cpu detected.");
}
if (gl_x86_cpu_features) {
gl_init_x86_asm_transforms ();
if ( gl_x86_cpu_features ) {
gl_init_x86_transform_asm();
}
#ifdef USE_MMX_ASM
if (gl_x86_cpu_features & GL_CPU_MMX) {
char *s = getenv( "MESA_NO_MMX" );
if (s == NULL) {
message("MMX cpu detected.");
if ( cpu_has_mmx ) {
if ( getenv( "MESA_NO_MMX" ) == 0 ) {
message( "MMX cpu detected." );
} else {
gl_x86_cpu_features &= (~GL_CPU_MMX);
gl_x86_cpu_features &= ~(X86_FEATURE_MMX);
}
}
#endif
#ifdef USE_3DNOW_ASM
if (gl_x86_cpu_features & GL_CPU_3Dnow) {
char *s = getenv( "MESA_NO_3DNOW" );
if (s == NULL) {
message("3Dnow cpu detected.");
gl_init_3dnow_asm_transforms ();
if ( cpu_has_3dnow ) {
if ( getenv( "MESA_NO_3DNOW" ) == 0 ) {
message( "3Dnow cpu detected." );
gl_init_3dnow_transform_asm();
} else {
gl_x86_cpu_features &= (~GL_CPU_3Dnow);
gl_x86_cpu_features &= ~(X86_FEATURE_3DNOW);
}
}
#endif
#ifdef USE_KATMAI_ASM
if (gl_x86_cpu_features & GL_CPU_Katmai) {
char *s = getenv( "MESA_NO_KATMAI" );
if (s == NULL) {
message("Katmai cpu detected.");
gl_init_katmai_asm_transforms ();
if ( cpu_has_xmm ) {
if ( getenv( "MESA_NO_KATMAI" ) == 0 ) {
message( "Katmai cpu detected." );
gl_init_katmai_transform_asm();
} else {
gl_x86_cpu_features &= (~GL_CPU_Katmai);
gl_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
}
#endif
#endif
}
/* Note: the above function must be called before this one, so that
* gl_x86_cpu_features gets correctly initialized.
*/
void gl_init_all_x86_vertex_asm( void )
{
#ifdef USE_X86_ASM
if ( gl_x86_cpu_features ) {
gl_init_x86_vertex_asm();
}
#ifdef USE_3DNOW_ASM
if ( cpu_has_3dnow && getenv( "MESA_NO_3DNOW" ) == 0 ) {
gl_init_3dnow_vertex_asm();
}
#endif
#ifdef USE_KATMAI_ASM
if ( cpu_has_xmm && getenv( "MESA_NO_KATMAI" ) == 0 ) {
gl_init_katmai_vertex_asm();
}
#endif
#endif
}

View file

@ -0,0 +1,152 @@
/* $Id: common_x86_asm.S,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Check extended CPU capabilities. Now justs returns the raw CPUID
* feature information, allowing the higher level code to interpret the
* results.
*
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*
* Cleaned up and simplified by Gareth Hughes <gareth@valinux.com>
*/
#include "assyntax.h"
#include "common_x86_features.h"
/* Intel vendor string
*/
#define GENU 0x756e6547 /* "Genu" */
#define INEI 0x49656e69 /* "ineI" */
#define NTEL 0x6c65746e /* "ntel" */
/* AMD vendor string
*/
#define AUTH 0x68747541 /* "Auth" */
#define ENTI 0x69746e65 /* "enti" */
#define CAMD 0x444d4163 /* "cAMD" */
SEG_DATA
/* We might want to print out some useful messages.
*/
LLBL( found_intel ): STRING( "Genuine Intel processor found\n\0" )
LLBL( found_amd ): STRING( "Authentic AMD processor found\n\0" )
SEG_TEXT
ALIGNTEXT4
GLOBL GLNAME( gl_identify_x86_cpu_features )
GLNAME( gl_identify_x86_cpu_features ):
PUSH_L ( EBX )
/* Test for the CPUID command. If the ID Flag bit in EFLAGS
* (bit 21) is writable, the CPUID command is present.
*/
PUSHF_L
POP_L ( EAX )
MOV_L ( EAX, ECX )
XOR_L ( CONST(0x00200000), EAX )
PUSH_L ( EAX )
POPF_L
PUSHF_L
POP_L ( EAX )
/* Verify the ID Flag bit has been written.
*/
CMP_L ( ECX, EAX )
JZ ( LLBL ( cpuid_done ) )
/* Get the CPU vendor info.
*/
XOR_L ( EAX, EAX )
CPUID
/* Test for Intel processors. We must look for the
* "GenuineIntel" string in EBX, ECX and EDX.
*/
CMP_L ( CONST(GENU), EBX )
JNE ( LLBL( cpuid_amd ) )
CMP_L ( CONST(INEI), EDX )
JNE ( LLBL( cpuid_amd ) )
CMP_L ( CONST(NTEL), ECX )
JNE ( LLBL( cpuid_amd ) )
/* We have an Intel processor, so we can get the feature
* information with an CPUID input value of 1.
*/
MOV_L ( CONST(0x1), EAX )
CPUID
MOV_L ( EDX, EAX )
JMP ( LLBL( cpuid_done ) )
LLBL( cpuid_amd ):
/* Test for AMD processors. We must look for the
* "AuthenticAMD" string in EBX, ECX and EDX.
*/
CMP_L ( CONST(AUTH), EBX )
JNE ( LLBL( cpuid_other ) )
CMP_L ( CONST(ENTI), EDX )
JNE ( LLBL( cpuid_other ) )
CMP_L ( CONST(CAMD), ECX )
JNE ( LLBL( cpuid_other ) )
/* We have an AMD processor, so we can get the feature
* information after we verify that the extended functions are
* supported.
*/
MOV_L ( CONST(0x80000000), EAX )
CPUID
TEST_L ( EAX, EAX )
JZ ( LLBL ( cpuid_failed ) )
MOV_L ( CONST(0x80000001), EAX )
CPUID
MOV_L ( EDX, EAX )
JMP ( LLBL ( cpuid_done ) )
LLBL( cpuid_other ):
/* Test for other processors here when required.
*/
LLBL( cpuid_failed ):
/* If we can't determine the feature information, we must
* return zero to indicate that no platform-specific
* optimizations can be used.
*/
MOV_L ( CONST(0), EAX )
LLBL ( cpuid_done ):
POP_L ( EBX )
RET

View file

@ -0,0 +1,63 @@
/* $Id: common_x86_asm.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Check CPU capabilities & initialize optimized funtions for this particular
* processor.
*
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
* Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
* new Katmai functions
*
* Reimplemented by Gareth Hughes <gareth@valinux.com> in a more
* future-proof manner, based on code in the Linux kernel.
*/
#ifndef __COMMON_X86_ASM_H__
#define __COMMON_X86_ASM_H__
#include "common_x86_features.h"
#ifdef HAVE_CONFIG_H
#include "conf.h"
#endif
#ifdef USE_X86_ASM
#include "x86.h"
#ifdef USE_3DNOW_ASM
#include "3dnow.h"
#endif
#ifdef USE_KATMAI_ASM
#include "katmai.h"
#endif
#endif
extern int gl_x86_cpu_features;
extern void gl_init_all_x86_transform_asm( void );
extern void gl_init_all_x86_vertex_asm( void );
#endif

View file

@ -0,0 +1,77 @@
/* $Id: common_x86_features.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* x86 CPUID feature information. The raw data is returned by
* gl_identify_x86_cpu_features() and interpreted with the cpu_has_*
* helper macros.
*
* Gareth Hughes <gareth@valinux.com>
*/
#ifndef __COMMON_X86_FEATURES_H__
#define __COMMON_X86_FEATURES_H__
/* Capabilities of CPUs
*/
#define X86_FEATURE_FPU 0x00000001
#define X86_FEATURE_VME 0x00000002
#define X86_FEATURE_DE 0x00000004
#define X86_FEATURE_PSE 0x00000008
#define X86_FEATURE_TSC 0x00000010
#define X86_FEATURE_MSR 0x00000020
#define X86_FEATURE_PAE 0x00000040
#define X86_FEATURE_MCE 0x00000080
#define X86_FEATURE_CX8 0x00000100
#define X86_FEATURE_APIC 0x00000200
#define X86_FEATURE_10 0x00000400
#define X86_FEATURE_SEP 0x00000800
#define X86_FEATURE_MTRR 0x00001000
#define X86_FEATURE_PGE 0x00002000
#define X86_FEATURE_MCA 0x00004000
#define X86_FEATURE_CMOV 0x00008000
#define X86_FEATURE_PAT 0x00010000
#define X86_FEATURE_PSE36 0x00020000
#define X86_FEATURE_18 0x00040000
#define X86_FEATURE_19 0x00080000
#define X86_FEATURE_20 0x00100000
#define X86_FEATURE_21 0x00200000
#define X86_FEATURE_MMXEXT 0x00400000
#define X86_FEATURE_MMX 0x00800000
#define X86_FEATURE_FXSR 0x01000000
#define X86_FEATURE_XMM 0x02000000
#define X86_FEATURE_26 0x04000000
#define X86_FEATURE_27 0x08000000
#define X86_FEATURE_28 0x10000000
#define X86_FEATURE_29 0x20000000
#define X86_FEATURE_3DNOWEXT 0x40000000
#define X86_FEATURE_3DNOW 0x80000000
#define cpu_has_mmx (gl_x86_cpu_features & X86_FEATURE_MMX)
#define cpu_has_xmm (gl_x86_cpu_features & X86_FEATURE_XMM)
#define cpu_has_3dnow (gl_x86_cpu_features & X86_FEATURE_3DNOW)
#endif

View file

@ -1,20 +1,21 @@
/* $Id: mmx.h,v 1.3 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.1
*
* Version: 3.5
*
* Copyright (C) 1999 Brian Paul All Rights Reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL

View file

@ -350,7 +350,3 @@ LLBL(GMBT_1):
MOV_L ( EBP, ESP )
POP_L ( EBP )
RET

View file

@ -1,21 +1,21 @@
/* $Id: x86.c,v 1.8 2000/06/27 22:10:01 brianp Exp $ */
/* $Id: x86.c,v 1.9 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.1
*
* Version: 3.5
*
* Copyright (C) 1999 Brian Paul All Rights Reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@ -28,7 +28,6 @@
* Intel x86 assembly code by Josh Vanderhoof
*/
#include "glheader.h"
#include "context.h"
#include "types.h"
@ -36,89 +35,104 @@
#include "xform.h"
#include "x86.h"
#ifdef USE_X86_ASM
extern void _ASMAPI gl_v16_x86_cliptest_points4( GLfloat *first_vert,
GLfloat *last_vert,
GLubyte *or_mask,
GLubyte *and_mask,
GLubyte *clip_mask );
#ifdef DEBUG
#include "debug_xform.h"
#endif
#define XFORM_ARGS GLvector4f *to_vec, \
const GLfloat m[16], \
const GLvector4f *from_vec, \
const GLubyte *mask, \
const GLubyte flag
#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \
extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS );
#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \
gl_transform_tab[cma][sz][MATRIX_GENERAL] = \
gl_##pfx##_transform_points##sz##_general_##masked; \
gl_transform_tab[cma][sz][MATRIX_IDENTITY] = \
gl_##pfx##_transform_points##sz##_identity_##masked; \
gl_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \
gl_##pfx##_transform_points##sz##_3d_no_rot_##masked; \
gl_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \
gl_##pfx##_transform_points##sz##_perspective_##masked; \
gl_transform_tab[cma][sz][MATRIX_2D] = \
gl_##pfx##_transform_points##sz##_2d_##masked; \
gl_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \
gl_##pfx##_transform_points##sz##_2d_no_rot_##masked; \
gl_transform_tab[cma][sz][MATRIX_3D] = \
gl_##pfx##_transform_points##sz##_3d_##masked;
#ifdef USE_X86_ASM
DECLARE_XFORM_GROUP( x86, 2, raw )
DECLARE_XFORM_GROUP( x86, 3, raw )
DECLARE_XFORM_GROUP( x86, 4, raw )
DECLARE_XFORM_GROUP( x86, 2, masked )
DECLARE_XFORM_GROUP( x86, 3, masked )
DECLARE_XFORM_GROUP( x86, 4, masked )
extern GLvector4f * _ASMAPI gl_x86_cliptest_points4( GLvector4f *clip_vec,
GLvector4f *proj_vec,
GLubyte clipMask[],
GLubyte *orMask,
GLubyte *andMask );
extern void _ASMAPI gl_v16_x86_cliptest_points4( GLfloat *first_vert,
GLfloat *last_vert,
GLubyte *or_mask,
GLubyte *and_mask,
GLubyte *clip_mask );
extern void _ASMAPI gl_v16_x86_general_xform( GLfloat *dest,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
GLuint count );
#endif
#define XFORM_ARGS GLvector4f *to_vec, \
const GLfloat m[16], \
const GLvector4f *from_vec, \
const GLubyte *mask, \
const GLubyte flag
#define DECLARE_XFORM_GROUP(pfx, vsize, masked) \
extern void _ASMAPI gl_##pfx##_transform_points##vsize##_general_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##vsize##_identity_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##vsize##_perspective_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##vsize##_2d_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked(XFORM_ARGS); \
extern void _ASMAPI gl_##pfx##_transform_points##vsize##_3d_##masked(XFORM_ARGS);
#define ASSIGN_XFORM_GROUP( pfx, cma, vsize, masked ) \
gl_transform_tab[cma][vsize][MATRIX_GENERAL] \
= gl_##pfx##_transform_points##vsize##_general_##masked; \
gl_transform_tab[cma][vsize][MATRIX_IDENTITY] \
= gl_##pfx##_transform_points##vsize##_identity_##masked; \
gl_transform_tab[cma][vsize][MATRIX_3D_NO_ROT] \
= gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked; \
gl_transform_tab[cma][vsize][MATRIX_PERSPECTIVE] \
= gl_##pfx##_transform_points##vsize##_perspective_##masked; \
gl_transform_tab[cma][vsize][MATRIX_2D] \
= gl_##pfx##_transform_points##vsize##_2d_##masked; \
gl_transform_tab[cma][vsize][MATRIX_2D_NO_ROT] \
= gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked; \
gl_transform_tab[cma][vsize][MATRIX_3D] \
= gl_##pfx##_transform_points##vsize##_3d_##masked;
#ifdef USE_X86_ASM
DECLARE_XFORM_GROUP( x86, 2, raw )
DECLARE_XFORM_GROUP( x86, 3, raw )
DECLARE_XFORM_GROUP( x86, 4, raw )
DECLARE_XFORM_GROUP( x86, 2, masked )
DECLARE_XFORM_GROUP( x86, 3, masked )
DECLARE_XFORM_GROUP( x86, 4, masked )
extern GLvector4f * _ASMAPI gl_x86_cliptest_points4( GLvector4f *clip_vec,
GLvector4f *proj_vec,
GLubyte clipMask[],
GLubyte *orMask,
GLubyte *andMask );
#endif
void gl_init_x86_asm_transforms( void )
void gl_init_x86_transform_asm( void )
{
#ifdef USE_X86_ASM
ASSIGN_XFORM_GROUP( x86, 0, 2, raw )
ASSIGN_XFORM_GROUP( x86, 0, 3, raw )
ASSIGN_XFORM_GROUP( x86, 0, 4, raw )
ASSIGN_XFORM_GROUP( x86, 0, 2, raw );
ASSIGN_XFORM_GROUP( x86, 0, 3, raw );
ASSIGN_XFORM_GROUP( x86, 0, 4, raw );
ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked )
ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked )
ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked )
ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked );
ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked );
ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked );
/* XXX this function has been found to cause FP overflow exceptions */
gl_clip_tab[4] = gl_x86_cliptest_points4;
#ifdef DEBUG
gl_test_all_transform_functions("x86");
gl_test_all_transform_functions( "x86" );
#endif
#endif
}
void gl_init_x86_vertex_asm( void )
{
#ifdef USE_X86_ASM
gl_xform_points3_v16_general = gl_v16_x86_general_xform;
gl_cliptest_points4_v16 = gl_v16_x86_cliptest_points4;
#if 0
gl_test_all_vertex_functions( "x86" );
#endif
gl_cliptest_points4_v16 = gl_v16_x86_cliptest_points4;
gl_xform_points3_v16_general = gl_v16_x86_general_xform;
#endif
}

View file

@ -1,21 +1,21 @@
/* $Id: x86.h,v 1.1 1999/08/19 00:55:42 jtg Exp $ */
/* $Id: x86.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.1
*
* Version: 3.5
*
* Copyright (C) 1999 Brian Paul All Rights Reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@ -28,10 +28,10 @@
* Intel x86 assembly code by Josh Vanderhoof
*/
#ifndef __X86_H__
#define __X86_H__
#ifndef X86_H
#define X86_H
extern void gl_init_x86_asm_transforms(void);
extern void gl_init_x86_transform_asm( void );
extern void gl_init_x86_vertex_asm( void );
#endif

248
src/mesa/x86/x86_cliptest.S Normal file
View file

@ -0,0 +1,248 @@
/* $Id: x86_cliptest.S,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "assyntax.h"
#include "clip_args.h"
#define FP_ONE 1065353216
#define FP_ZERO 0
#define SRC(i) REGOFF(i * 4, ESI)
#define DST(i) REGOFF(i * 4, EDI)
#define MAT(i) REGOFF(i * 4, EDX)
/*
* Table for clip test.
*
* bit6 = SRC(3) < 0
* bit5 = SRC(2) < 0
* bit4 = abs(S(2)) > abs(S(3))
* bit3 = SRC(1) < 0
* bit2 = abs(S(1)) > abs(S(3))
* bit1 = SRC(0) < 0
* bit0 = abs(S(0)) > abs(S(3))
*/
SEG_DATA
clip_table:
D_BYTE 0, 1, 0, 2, 4, 5, 4, 6
D_BYTE 0, 1, 0, 2, 8, 9, 8, 10
D_BYTE 32, 33, 32, 34, 36, 37, 36, 38
D_BYTE 32, 33, 32, 34, 40, 41, 40, 42
D_BYTE 0, 1, 0, 2, 4, 5, 4, 6
D_BYTE 0, 1, 0, 2, 8, 9, 8, 10
D_BYTE 16, 17, 16, 18, 20, 21, 20, 22
D_BYTE 16, 17, 16, 18, 24, 25, 24, 26
D_BYTE 63, 61, 63, 62, 55, 53, 55, 54
D_BYTE 63, 61, 63, 62, 59, 57, 59, 58
D_BYTE 47, 45, 47, 46, 39, 37, 39, 38
D_BYTE 47, 45, 47, 46, 43, 41, 43, 42
D_BYTE 63, 61, 63, 62, 55, 53, 55, 54
D_BYTE 63, 61, 63, 62, 59, 57, 59, 58
D_BYTE 31, 29, 31, 30, 23, 21, 23, 22
D_BYTE 31, 29, 31, 30, 27, 25, 27, 26
SEG_TEXT
/*
* gl_x86_cliptest_points4
*
* AL: ormask
* AH: andmask
* EBX: temp0
* ECX: temp1
* EDX: clipmask[]
* ESI: clip[]
* EDI: proj[]
* EBP: temp2
*/
#if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC)
#define ELFPIC
#endif
ALIGNTEXT16
GLOBL GLNAME( gl_x86_cliptest_points4 )
GLNAME( gl_x86_cliptest_points4 ):
#ifdef ELFPIC
#define FRAME_OFFSET 20
#else
#define FRAME_OFFSET 16
#endif
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBP )
PUSH_L( EBX )
#ifdef ELFPIC
/* store pointer to clip_table on stack */
CALL( LLBL( ctp4_get_eip ) )
ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
PUSH_L( EBX )
JMP( LLBL( ctp4_clip_table_ready ) )
LLBL( ctp4_get_eip ):
/* store eip in ebx */
MOV_L( REGIND(ESP), EBX )
RET
LLBL( ctp4_clip_table_ready ):
#endif
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_CLIP, EDX )
MOV_L( ARG_OR, EBX )
MOV_L( ARG_AND, EBP )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDX, ECX )
MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */
CMP_L( ECX, EDX )
MOV_B( REGIND(EBX), AL )
MOV_B( REGIND(EBP), AH )
JZ( LLBL( ctp4_finish ) )
ALIGNTEXT16
LLBL( ctp4_top ):
FLD1 /* F3 */
FDIV_S( SRC(3) )
MOV_L( SRC(3), EBP )
MOV_L( SRC(2), EBX )
XOR_L( ECX, ECX )
ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */
ADC_L( ECX, ECX )
ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */
ADC_L( ECX, ECX )
CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */
ADC_L( ECX, ECX )
MOV_L( SRC(1), EBX )
ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */
ADC_L( ECX, ECX )
CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */
ADC_L( ECX, ECX )
MOV_L( SRC(0), EBX )
ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */
ADC_L( ECX, ECX )
CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */
ADC_L( ECX, ECX )
#ifdef ELFPIC
MOV_L( REGIND(ESP), EBP ) /* clip_table */
MOV_B( REGBI(EBP, ECX), CL )
#else
MOV_B( REGOFF(clip_table,ECX), CL )
#endif
OR_B( CL, AL )
AND_B( CL, AH )
TEST_B( CL, CL )
MOV_B( CL, REGIND(EDX) )
JZ( LLBL( ctp4_proj ) )
FSTP( ST(0) ) /* */
JMP( LLBL( ctp4_next ) )
LLBL( ctp4_proj ):
FLD_S( SRC(0) ) /* F0 F3 */
FMUL2( ST(1), ST(0) )
FLD_S( SRC(1) ) /* F1 F0 F3 */
FMUL2( ST(2), ST(0) )
FLD_S( SRC(2) ) /* F2 F1 F0 F3 */
FMUL2( ST(3), ST(0) )
FXCH( ST(2) ) /* F0 F1 F2 F3 */
FSTP_S( DST(0) ) /* F1 F2 F3 */
FSTP_S( DST(1) ) /* F2 F3 */
FSTP_S( DST(2) ) /* F3 */
FSTP_S( DST(3) ) /* */
LLBL( ctp4_next ):
INC_L( EDX )
ADD_L( CONST(16), EDI )
ADD_L( ARG_SOURCE, ESI )
CMP_L( EDX, ARG_CLIP )
JNZ( LLBL( ctp4_top ) )
MOV_L( ARG_OR, ECX )
MOV_L( ARG_AND, EDX )
MOV_B( AL, REGIND(ECX) )
MOV_B( AH, REGIND(EDX) )
LLBL( ctp4_finish ):
MOV_L( ARG_DEST, EAX )
#ifdef ELFPIC
POP_L( ESI ) /* discard ptr to clip_table */
#endif
POP_L( EBX )
POP_L( EBP )
POP_L( EDI )
POP_L( ESI )
RET

74
src/mesa/x86/xform_args.h Normal file
View file

@ -0,0 +1,74 @@
/* $Id: xform_args.h,v 1.2 2000/10/23 00:16:29 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Transform function interface for assembly code. Simply define
* FRAME_OFFSET to the number of bytes pushed onto the stack before
* using the ARG_* argument macros.
*
* Gareth Hughes <gareth@valinux.com>
*/
#ifndef __XFORM_ARGS_H__
#define __XFORM_ARGS_H__
/* Offsets into GLvector4f
*/
#define V4F_DATA 0
#define V4F_START 4
#define V4F_COUNT 8
#define V4F_STRIDE 12
#define V4F_SIZE 16
#define V4F_FLAGS 20
/* GLvector4f flags
*/
#define VEC_SIZE_1 1
#define VEC_SIZE_2 3
#define VEC_SIZE_3 7
#define VEC_SIZE_4 15
/* Offsets for transform_func arguments
*
* typedef void (*transform_func)( GLvector4f *to_vec,
* const GLfloat m[16],
* const GLvector4f *from_vec,
* const GLubyte *clipmask,
* const GLubyte flag );
*/
#define OFFSET_DEST 4
#define OFFSET_MATRIX 8
#define OFFSET_SOURCE 12
#define OFFSET_CLIP 16
#define OFFSET_FLAG 20
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP)
#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP)
#define ARG_FLAG REGOFF(FRAME_OFFSET+OFFSET_FLAG, ESP)
#endif