draw: subclass vertex shaders according to execution method

Create new files for shaders compiled/executed with llvm, sse, exec
respectively
This commit is contained in:
Keith Whitwell 2008-02-15 13:37:01 +00:00
parent e822e09b89
commit b29d8d2729
8 changed files with 766 additions and 232 deletions

View file

@ -1,2 +1,2 @@
default:
cd .. ; make
cd ../../../mesa ; make

View file

@ -128,13 +128,25 @@ struct draw_stage
* Private version of the compiled vertex_shader
*/
struct draw_vertex_shader {
/* This member will disappear shortly:
*/
const struct pipe_shader_state *state;
#if defined(__i386__) || defined(__386__)
struct x86_function sse2_program;
#endif
#ifdef MESA_LLVM
struct gallivm_prog *llvm_prog;
#endif
void (*prepare)( struct draw_vertex_shader *shader,
struct draw_context *draw );
/* Run the shader - this interface will get cleaned up in the
* future:
*/
void (*run)( struct draw_vertex_shader *shader,
struct draw_context *draw,
const unsigned *elts,
unsigned count,
struct vertex_header *vOut[] );
void (*delete)( struct draw_vertex_shader * );
};
@ -176,7 +188,7 @@ struct draw_context
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX];
const struct draw_vertex_shader *vertex_shader;
struct draw_vertex_shader *vertex_shader;
uint num_vs_outputs; /**< convenience, from vertex_shader */
@ -201,6 +213,7 @@ struct draw_context
boolean convert_wide_points; /**< convert wide points to tris? */
boolean convert_wide_lines; /**< convert side lines to tris? */
boolean use_sse;
unsigned reduced_prim;
@ -255,11 +268,10 @@ struct draw_context
unsigned queue_nr;
} pq;
int use_sse : 1;
#ifdef MESA_LLVM
struct gallivm_cpu_engine *engine;
#endif
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
*/
struct gallivm_cpu_engine *engine;
void *driver_private;
};
@ -290,11 +302,7 @@ extern void draw_vertex_cache_invalidate( struct draw_context *draw );
extern void draw_vertex_cache_unreference( struct draw_context *draw );
extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw );
extern void draw_vertex_shader_queue_flush( struct draw_context *draw );
#ifdef MESA_LLVM
extern void draw_vertex_shader_queue_flush_llvm( struct draw_context *draw );
#endif
struct tgsi_exec_machine;

View file

@ -33,177 +33,10 @@
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#if defined(__i386__) || defined(__386__)
#include "tgsi/exec/tgsi_sse2.h"
#endif
#include "draw_private.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "x86/rtasm/x86sse.h"
#include "llvm/gallivm.h"
#define DBG_VS 0
static INLINE unsigned
compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
{
unsigned mask = 0;
unsigned i;
/* Do the hardwired planes first:
*/
if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
/* Followed by any remaining ones:
*/
for (i = 6; i < nr; i++) {
if (dot4(clip, plane[i]) < 0)
mask |= (1<<i);
}
return mask;
}
typedef void (XSTDCALL *codegen_function) (
const struct tgsi_exec_vector *input,
struct tgsi_exec_vector *output,
float (*constant)[4],
struct tgsi_exec_vector *temporary );
/**
* Transform vertices with the current vertex program/shader
* Up to four vertices can be shaded at a time.
* \param vbuffer the input vertex data
* \param elts indexes of four input vertices
* \param count number of vertices to shade [1..4]
* \param vOut array of pointers to four output vertices
*/
static void
run_vertex_program(struct draw_context *draw,
unsigned elts[4], unsigned count,
struct vertex_header *vOut[])
{
struct tgsi_exec_machine *machine = &draw->machine;
unsigned int j;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
assert(count <= 4);
assert(draw->vertex_shader->state->output_semantic_name[0]
== TGSI_SEMANTIC_POSITION);
/* Consts does not require 16 byte alignment. */
machine->Consts = (float (*)[4]) draw->user.constants;
machine->Inputs = ALIGN16_ASSIGN(inputs);
machine->Outputs = ALIGN16_ASSIGN(outputs);
draw->vertex_fetch.fetch_func( draw, machine, elts, count );
/* run shader */
#ifdef MESA_LLVM
if (1) {
struct gallivm_prog *prog = draw->vertex_shader->llvm_prog;
gallivm_cpu_vs_exec(prog,
machine->Inputs,
machine->Outputs,
machine->Consts,
machine->Temps);
} else
#elif defined(__i386__) || defined(__386__)
if (draw->use_sse) {
/* SSE */
/* cast away const */
struct draw_vertex_shader *shader
= (struct draw_vertex_shader *)draw->vertex_shader;
codegen_function func
= (codegen_function) x86_get_func( &shader->sse2_program );
if (func)
func(
machine->Inputs,
machine->Outputs,
machine->Consts,
machine->Temps );
else
/* interpreter */
tgsi_exec_machine_run( machine );
}
else
#endif
{
/* interpreter */
tgsi_exec_machine_run( machine );
}
/* store machine results */
for (j = 0; j < count; j++) {
unsigned slot;
float x, y, z, w;
/* Handle attr[0] (position) specially:
*
* XXX: Computing the clipmask should be done in the vertex
* program as a set of DP4 instructions appended to the
* user-provided code.
*/
x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
vOut[j]->edgeflag = 1;
/* divide by w */
w = 1.0f / w;
x *= w;
y *= w;
z *= w;
/* Viewport mapping */
vOut[j]->data[0][0] = x * scale[0] + trans[0];
vOut[j]->data[0][1] = y * scale[1] + trans[1];
vOut[j]->data[0][2] = z * scale[2] + trans[2];
vOut[j]->data[0][3] = w;
#if DBG_VS
debug_printf("output[%d]win: %f %f %f %f\n", j,
vOut[j]->data[0][0],
vOut[j]->data[0][1],
vOut[j]->data[0][2],
vOut[j]->data[0][3]);
#endif
/* Remaining attributes are packed into sequential post-transform
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
#if DBG_VS
debug_printf("output[%d][%d]: %f %f %f %f\n", j, slot,
vOut[j]->data[slot][0],
vOut[j]->data[slot][1],
vOut[j]->data[slot][2],
vOut[j]->data[slot][3]);
#endif
}
} /* loop over vertices */
}
/**
@ -213,13 +46,14 @@ run_vertex_program(struct draw_context *draw,
void
draw_vertex_shader_queue_flush(struct draw_context *draw)
{
struct draw_vertex_shader *shader = draw->vertex_shader;
unsigned i;
assert(draw->vs.queue_nr != 0);
/* XXX: do this on statechange:
*/
draw_update_vertex_fetch( draw );
shader->prepare( shader, draw );
// fprintf(stderr, " q(%d) ", draw->vs.queue_nr );
@ -242,7 +76,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw)
assert(n > 0);
assert(n <= 4);
run_vertex_program(draw, elts, n, dests);
shader->run(shader, draw, elts, n, dests);
}
draw->vs.queue_nr = 0;
@ -255,43 +89,16 @@ draw_create_vertex_shader(struct draw_context *draw,
{
struct draw_vertex_shader *vs;
vs = CALLOC_STRUCT( draw_vertex_shader );
if (vs == NULL) {
return NULL;
}
vs = draw_create_vs_llvm( draw, shader );
if (vs)
return vs;
vs->state = shader;
#ifdef MESA_LLVM
struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
gallivm_ir_set_layout(ir, GALLIVM_SOA);
gallivm_ir_set_components(ir, 4);
gallivm_ir_fill_from_tgsi(ir, shader->tokens);
vs->llvm_prog = gallivm_ir_compile(ir);
gallivm_ir_delete(ir);
draw->engine = gallivm_global_cpu_engine();
if (!draw->engine) {
draw->engine = gallivm_cpu_engine_create(vs->llvm_prog);
}
else {
gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog);
}
#elif defined(__i386__) || defined(__386__)
if (draw->use_sse) {
/* cast-away const */
struct pipe_shader_state *sh = (struct pipe_shader_state *) shader;
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) sh->tokens,
&vs->sse2_program )) {
x86_release_func( (struct x86_function *) &vs->sse2_program );
fprintf(stdout /*err*/,
"tgsi_emit_sse2() failed, falling back to interpreter\n");
}
}
#endif
vs = draw_create_vs_sse( draw, shader );
if (vs)
return vs;
vs = draw_create_vs_exec( draw, shader );
assert(vs);
return vs;
}
@ -307,11 +114,7 @@ draw_bind_vertex_shader(struct draw_context *draw,
tgsi_exec_machine_init(&draw->machine);
/* specify the vertex program to interpret/execute */
tgsi_exec_machine_bind_shader(&draw->machine,
draw->vertex_shader->state->tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
dvs->prepare( dvs, draw );
}
@ -319,9 +122,5 @@ void
draw_delete_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs)
{
#if defined(__i386__) || defined(__386__)
x86_release_func( (struct x86_function *) &dvs->sse2_program );
#endif
FREE( dvs );
dvs->delete( dvs );
}

View file

@ -0,0 +1,50 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/* Authors: Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef DRAW_VS_H
#define DRAW_VS_H
struct draw_vertex_shader;
struct draw_context;
struct pipe_shader_state;
struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *templ);
struct draw_vertex_shader *
draw_create_vs_sse(struct draw_context *draw,
const struct pipe_shader_state *templ);
struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
#endif

View file

@ -0,0 +1,186 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* Brian Paul
*/
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "draw_private.h"
#include "draw_context.h"
#include "draw_vs.h"
static INLINE unsigned
compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
{
unsigned mask = 0;
unsigned i;
/* Do the hardwired planes first:
*/
if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
/* Followed by any remaining ones:
*/
for (i = 6; i < nr; i++) {
if (dot4(clip, plane[i]) < 0)
mask |= (1<<i);
}
return mask;
}
static void
vs_exec_prepare( struct draw_vertex_shader *shader,
struct draw_context *draw )
{
/* specify the vertex program to interpret/execute */
tgsi_exec_machine_bind_shader(&draw->machine,
shader->state->tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
draw_update_vertex_fetch( draw );
}
/**
* Transform vertices with the current vertex program/shader
* Up to four vertices can be shaded at a time.
* \param vbuffer the input vertex data
* \param elts indexes of four input vertices
* \param count number of vertices to shade [1..4]
* \param vOut array of pointers to four output vertices
*/
static void
vs_exec_run( struct draw_vertex_shader *shader,
struct draw_context *draw,
const unsigned *elts,
unsigned count,
struct vertex_header *vOut[] )
{
struct tgsi_exec_machine *machine = &draw->machine;
unsigned int j;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
assert(count <= 4);
assert(draw->vertex_shader->state->output_semantic_name[0]
== TGSI_SEMANTIC_POSITION);
machine->Consts = (float (*)[4]) draw->user.constants;
machine->Inputs = ALIGN16_ASSIGN(inputs);
machine->Outputs = ALIGN16_ASSIGN(outputs);
draw->vertex_fetch.fetch_func( draw, machine, elts, count );
/* run interpreter */
tgsi_exec_machine_run( machine );
/* store machine results */
for (j = 0; j < count; j++) {
unsigned slot;
float x, y, z, w;
/* Handle attr[0] (position) specially:
*
* XXX: Computing the clipmask should be done in the vertex
* program as a set of DP4 instructions appended to the
* user-provided code.
*/
x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
vOut[j]->edgeflag = 1;
/* divide by w */
w = 1.0f / w;
x *= w;
y *= w;
z *= w;
/* Viewport mapping */
vOut[j]->data[0][0] = x * scale[0] + trans[0];
vOut[j]->data[0][1] = y * scale[1] + trans[1];
vOut[j]->data[0][2] = z * scale[2] + trans[2];
vOut[j]->data[0][3] = w;
/* Remaining attributes are packed into sequential post-transform
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
} /* loop over vertices */
}
static void
vs_exec_delete( struct draw_vertex_shader *dvs )
{
FREE( dvs );
}
struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *state)
{
struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader );
if (vs == NULL)
return NULL;
vs->state = state;
vs->prepare = vs_exec_prepare;
vs->run = vs_exec_run;
vs->delete = vs_exec_delete;
return vs;
}

View file

@ -0,0 +1,237 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Zack Rusin
* Keith Whitwell <keith@tungstengraphics.com>
* Brian Paul
*/
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "draw_private.h"
#include "draw_context.h"
#include "draw_vs.h"
#ifdef MESA_LLVM
#include "llvm/gallivm.h"
struct draw_llvm_vertex_shader {
struct draw_vertex_shader base;
struct gallivm_prog *llvm_prog;
};
static INLINE unsigned
compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
{
unsigned mask = 0;
unsigned i;
/* Do the hardwired planes first:
*/
if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
/* Followed by any remaining ones:
*/
for (i = 6; i < nr; i++) {
if (dot4(clip, plane[i]) < 0)
mask |= (1<<i);
}
return mask;
}
static void
vs_llvm_prepare( struct draw_vertex_shader *base,
struct draw_context *draw )
{
draw_update_vertex_fetch( draw );
}
/**
* Transform vertices with the current vertex program/shader
* Up to four vertices can be shaded at a time.
* \param vbuffer the input vertex data
* \param elts indexes of four input vertices
* \param count number of vertices to shade [1..4]
* \param vOut array of pointers to four output vertices
*/
static void
vs_llvm_run( struct draw_vertex_shader *base,
struct draw_context *draw,
const unsigned *elts,
unsigned count,
struct vertex_header *vOut[] )
{
struct draw_llvm_vertex_shader *shader =
(struct draw_llvm_vertex_shader *)base;
struct tgsi_exec_machine *machine = &draw->machine;
unsigned int j;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
assert(count <= 4);
assert(draw->vertex_shader->state->output_semantic_name[0]
== TGSI_SEMANTIC_POSITION);
/* Consts does not require 16 byte alignment. */
machine->Consts = (float (*)[4]) draw->user.constants;
machine->Inputs = ALIGN16_ASSIGN(inputs);
machine->Outputs = ALIGN16_ASSIGN(outputs);
draw->vertex_fetch.fetch_func( draw, machine, elts, count );
/* run shader */
gallivm_cpu_vs_exec(shader->llvm_prog,
machine->Inputs,
machine->Outputs,
machine->Consts,
machine->Temps);
/* store machine results */
for (j = 0; j < count; j++) {
unsigned slot;
float x, y, z, w;
x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
vOut[j]->edgeflag = 1;
/* divide by w */
w = 1.0f / w;
x *= w;
y *= w;
z *= w;
/* Viewport mapping */
vOut[j]->data[0][0] = x * scale[0] + trans[0];
vOut[j]->data[0][1] = y * scale[1] + trans[1];
vOut[j]->data[0][2] = z * scale[2] + trans[2];
vOut[j]->data[0][3] = w;
/* Remaining attributes are packed into sequential post-transform
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
} /* loop over vertices */
}
static void
vs_llvm_delete( struct draw_vertex_shader *base )
{
struct draw_llvm_vertex_shader *shader =
(struct draw_llvm_vertex_shader *)base;
/* Do something to free compiled shader:
*/
FREE( shader );
}
struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ)
{
struct draw_llvm_vertex_shader *vs;
vs = CALLOC_STRUCT( draw_llvm_vertex_shader );
if (vs == NULL)
return NULL;
vs->base.state = templ;
vs->base.prepare = vs_llvm_prepare;
vs->base.run = vs_llvm_run;
vs->base.delete = vs_llvm_delete;
{
struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
gallivm_ir_set_layout(ir, GALLIVM_SOA);
gallivm_ir_set_components(ir, 4);
gallivm_ir_fill_from_tgsi(ir, vs->base.state->tokens);
vs->llvm_prog = gallivm_ir_compile(ir);
gallivm_ir_delete(ir);
}
draw->engine = gallivm_global_cpu_engine();
/* XXX: Why are there two versions of this? Shouldn't creating the
* engine be a separate operation to compiling a shader?
*/
if (!draw->engine) {
draw->engine = gallivm_cpu_engine_create(vs->llvm_prog);
}
else {
gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog);
}
return &vs->base;
}
#else
struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *shader)
{
return NULL;
}
#endif

View file

@ -0,0 +1,251 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* Brian Paul
*/
#include "draw_vs.h"
#if defined(__i386__) || defined(__386__)
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "draw_private.h"
#include "draw_context.h"
#include "x86/rtasm/x86sse.h"
#include "tgsi/exec/tgsi_sse2.h"
typedef void (XSTDCALL *codegen_function) (
const struct tgsi_exec_vector *input,
struct tgsi_exec_vector *output,
float (*constant)[4],
struct tgsi_exec_vector *temporary );
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
struct x86_function sse2_program;
codegen_function func;
};
/* Should be part of the generated shader:
*/
static INLINE unsigned
compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
{
unsigned mask = 0;
unsigned i;
/* Do the hardwired planes first:
*/
if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
/* Followed by any remaining ones:
*/
for (i = 6; i < nr; i++) {
if (dot4(clip, plane[i]) < 0)
mask |= (1<<i);
}
return mask;
}
static void
vs_sse_prepare( struct draw_vertex_shader *base,
struct draw_context *draw )
{
draw_update_vertex_fetch( draw );
}
/**
* Transform vertices with the current vertex program/shader
* Up to four vertices can be shaded at a time.
* \param vbuffer the input vertex data
* \param elts indexes of four input vertices
* \param count number of vertices to shade [1..4]
* \param vOut array of pointers to four output vertices
*/
static void
vs_sse_run( struct draw_vertex_shader *base,
struct draw_context *draw,
const unsigned *elts,
unsigned count,
struct vertex_header *vOut[] )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
struct tgsi_exec_machine *machine = &draw->machine;
unsigned int j;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
assert(count <= 4);
assert(draw->vertex_shader->state->output_semantic_name[0]
== TGSI_SEMANTIC_POSITION);
/* Consts does not require 16 byte alignment. */
machine->Consts = (float (*)[4]) draw->user.constants;
machine->Inputs = ALIGN16_ASSIGN(inputs);
machine->Outputs = ALIGN16_ASSIGN(outputs);
/* Fetch vertices. This may at some point be integrated into the
* compiled shader -- that would require a reorganization where
* multiple versions of the compiled shader might exist,
* specialized for each fetch state.
*/
draw->vertex_fetch.fetch_func( draw, machine, elts, count );
/* run compiled shader
*/
shader->func(
machine->Inputs,
machine->Outputs,
machine->Consts,
machine->Temps );
/* XXX: Computing the clipmask and emitting results should be done
* in the vertex program as a set of instructions appended to
* the user-provided code.
*/
for (j = 0; j < count; j++) {
unsigned slot;
float x, y, z, w;
x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
vOut[j]->edgeflag = 1;
/* divide by w */
w = 1.0f / w;
x *= w;
y *= w;
z *= w;
/* Viewport mapping */
vOut[j]->data[0][0] = x * scale[0] + trans[0];
vOut[j]->data[0][1] = y * scale[1] + trans[1];
vOut[j]->data[0][2] = z * scale[2] + trans[2];
vOut[j]->data[0][3] = w;
/* Remaining attributes are packed into sequential post-transform
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
}
}
static void
vs_sse_delete( struct draw_vertex_shader *base )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
x86_release_func( &shader->sse2_program );
FREE( shader );
}
struct draw_vertex_shader *
draw_create_vs_sse(struct draw_context *draw,
const struct pipe_shader_state *templ)
{
struct draw_sse_vertex_shader *vs;
if (!draw->use_sse)
return NULL;
vs = CALLOC_STRUCT( draw_sse_vertex_shader );
if (vs == NULL)
return NULL;
vs->base.state = templ;
vs->base.prepare = vs_sse_prepare;
vs->base.run = vs_sse_run;
vs->base.delete = vs_sse_delete;
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state->tokens,
&vs->sse2_program ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
return &vs->base;
fail:
fprintf(stderr, "tgsi_emit_sse2() failed, falling back to interpreter\n");
x86_release_func( &vs->sse2_program );
FREE(vs);
return NULL;
}
#else
struct draw_vertex_shader *
draw_create_vs_sse( struct draw_context *draw,
const struct pipe_shader_state *templ )
{
return NULL;
}
#endif

View file

@ -159,6 +159,9 @@ VF_SOURCES = \
DRAW_SOURCES = \
$(TOP)/src/gallium/auxiliary/draw/draw_clip.c \
$(TOP)/src/gallium/auxiliary/draw/draw_vs_exec.c \
$(TOP)/src/gallium/auxiliary/draw/draw_vs_sse.c \
$(TOP)/src/gallium/auxiliary/draw/draw_vs_llvm.c \
$(TOP)/src/gallium/auxiliary/draw/draw_context.c\
$(TOP)/src/gallium/auxiliary/draw/draw_cull.c \
$(TOP)/src/gallium/auxiliary/draw/draw_debug.c \