draw: split off all the extra functionality in the vertex shader

This will at least allow us to make the initial gains to get decent
vertex performance much more quickly & with higher confidence of getting
it right.

At some later point can look again at code-generating all the
fetch/cliptest/viewport extras in the same block as the vertex shader.
For now, just need to get some decent baseline performance.
This commit is contained in:
Keith Whitwell 2008-04-17 23:44:32 +01:00
parent 01b6354e72
commit a773f06e96
13 changed files with 635 additions and 118 deletions

View file

@ -20,8 +20,10 @@ C_SOURCES = \
draw_pt_fetch_emit.c \
draw_pt_fetch_pipeline.c \
draw_pt_fetch_shade_pipeline.c \
draw_pt_pipeline.c \
draw_pt_fetch.c \
draw_pt_post_vs.c \
draw_pt_emit.c \
draw_pt_pipeline.c \
draw_pt_elts.c \
draw_prim.c \
draw_pstipple.c \

View file

@ -110,6 +110,12 @@ struct draw_context *draw_create( void )
tgsi_exec_machine_init(&draw->machine);
/* FIXME: give this machine thing a proper constructor:
*/
draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
if (!draw_pt_init( draw ))
goto fail;
@ -155,8 +161,13 @@ void draw_destroy( struct draw_context *draw )
if (draw->pipeline.rasterize)
draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
if (draw->machine.Inputs)
align_free(draw->machine.Inputs);
if (draw->machine.Outputs)
align_free(draw->machine.Outputs);
tgsi_exec_machine_free_data(&draw->machine);
if (draw->vs.vertex_cache)
align_free( draw->vs.vertex_cache ); /* Frees all the vertices. */
@ -265,6 +276,7 @@ draw_set_vertex_elements(struct draw_context *draw,
draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
memcpy(draw->vertex_element, elements, count * sizeof(elements[0]));
draw->nr_vertex_elements = count;
}
@ -463,15 +475,3 @@ boolean draw_get_edgeflag( struct draw_context *draw,
return 1;
}
#if 0
/* Crufty init function. Fix me.
*/
boolean draw_init_machine( struct draw_context *draw )
{
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
machine->Inputs = ALIGN16_ASSIGN(inputs);
machine->Outputs = ALIGN16_ASSIGN(outputs);
}
#endif

View file

@ -224,6 +224,8 @@ struct draw_context
unsigned nr_vertex_buffers;
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned nr_vertex_elements;
struct draw_vertex_shader *vertex_shader;
boolean identity_viewport;

View file

@ -112,6 +112,7 @@ struct draw_pt_middle_end {
* mode...
*/
struct vbuf_render;
struct vertex_header;
/* Helper functions.
@ -132,25 +133,25 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *d
*/
void draw_pt_run_pipeline( struct draw_context *draw,
unsigned prim,
char *verts,
unsigned vertex_stride,
struct vertex_header *verts,
unsigned vertex_count,
unsigned vertex_stride,
const ushort *elts,
unsigned count );
/* HW vertex emit:
/*******************************************************************************
* HW vertex emit:
*/
struct pt_emit;
void draw_pt_emit_prepare( struct pt_emit *emit,
unsigned prim,
unsigned opt );
unsigned prim );
void draw_pt_emit( struct pt_emit *emit,
char *verts,
unsigned stride,
const float (*vertex_data)[4],
unsigned vertex_count,
unsigned stride,
const ushort *elts,
unsigned count );
@ -159,6 +160,42 @@ void draw_pt_emit_destroy( struct pt_emit *emit );
struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
/*******************************************************************************
* API vertex fetch:
*/
struct pt_fetch;
void draw_pt_fetch_prepare( struct pt_fetch *fetch,
boolean emit_header,
unsigned vertex_size );
void draw_pt_fetch_run( struct pt_fetch *fetch,
const unsigned *elts,
unsigned count,
char *verts );
void draw_pt_fetch_destroy( struct pt_fetch *fetch );
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
/*******************************************************************************
* Post-VS: cliptest, rhw, viewport
*/
struct pt_post_vs;
boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
struct vertex_header *pipeline_verts,
unsigned stride,
unsigned count );
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
boolean bypass_clipping,
boolean identity_viewport,
boolean opengl );
struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
#endif

View file

@ -38,16 +38,11 @@ struct pt_emit {
struct draw_context *draw;
struct translate *translate;
unsigned pipeline_vertex_size;
unsigned prim;
unsigned opt;
};
void draw_pt_emit_prepare( struct pt_emit *emit,
unsigned prim,
unsigned opt )
unsigned prim )
{
struct draw_context *draw = emit->draw;
const struct vertex_info *vinfo;
@ -75,8 +70,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
unsigned emit_sz = 0;
unsigned src_buffer = 0;
unsigned output_format;
unsigned src_offset = (sizeof(struct vertex_header) +
vinfo->src_index[i] * 4 * sizeof(float) );
unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) );
@ -139,9 +133,9 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
void draw_pt_emit( struct pt_emit *emit,
char *verts,
unsigned stride,
const float (*vertex_data)[4],
unsigned vertex_count,
unsigned stride,
const ushort *elts,
unsigned count )
{
@ -164,7 +158,7 @@ void draw_pt_emit( struct pt_emit *emit,
translate->set_buffer(translate,
0,
verts,
vertex_data,
stride );
translate->set_buffer(translate,

View file

@ -0,0 +1,175 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "pipe/p_util.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
#include "translate/translate.h"
struct pt_fetch {
struct draw_context *draw;
struct translate *translate;
unsigned vertex_size;
};
/* Perform the fetch from API vertex elements & vertex buffers, to a
* contiguous set of float[4] attributes as required for the
* vertex_shader->run_linear() method.
*
* This is used in all cases except pure passthrough
* (draw_pt_fetch_emit.c) which has its own version to translate
* directly to hw vertices.
*
*/
void draw_pt_fetch_prepare( struct pt_fetch *fetch,
boolean emit_header,
unsigned vertex_size )
{
struct draw_context *draw = fetch->draw;
unsigned i, nr = 0;
unsigned dst_offset = 0;
struct translate_key key;
fetch->vertex_size = vertex_size;
memset(&key, 0, sizeof(key));
/* If PT_SHADE is not set, then we are creating post-shader
* vertices, meaning that we need to emit/leave space for a vertex
* header.
*
* It's worth considering whether the vertex headers should contain
* a pointer to the 'data', rather than having it inline.
* Something to look at after we've fully switched over to the pt
* paths.
*/
if (emit_header)
{
/* Need to set header->vertex_id = 0xffff somehow.
*/
key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
key.element[nr].input_buffer = draw->nr_vertex_buffers;
key.element[nr].input_offset = 0;
key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
key.element[nr].output_offset = dst_offset;
dst_offset += 1 * sizeof(float);
nr++;
/* Just leave the clip[] array untouched.
*/
dst_offset += 4 * sizeof(float);
}
for (i = 0; i < draw->nr_vertex_elements; i++) {
key.element[nr].input_format = draw->vertex_element[i].src_format;
key.element[nr].input_buffer = draw->vertex_element[i].vertex_buffer_index;
key.element[nr].input_offset = draw->vertex_element[i].src_offset;
key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
key.element[nr].output_offset = dst_offset;
dst_offset += 4 * sizeof(float);
nr++;
}
assert(dst_offset <= vertex_size);
key.nr_elements = nr;
key.output_stride = vertex_size;
/* Don't bother with caching at this stage:
*/
if (!fetch->translate ||
memcmp(&fetch->translate->key, &key, sizeof(key)) != 0)
{
if (fetch->translate)
fetch->translate->release(fetch->translate);
fetch->translate = translate_generic_create( &key );
if (emit_header) {
static struct vertex_header vh = { 0, 0, 0, 0xffff };
fetch->translate->set_buffer(fetch->translate,
draw->nr_vertex_buffers,
&vh,
0);
}
}
}
void draw_pt_fetch_run( struct pt_fetch *fetch,
const unsigned *elts,
unsigned count,
char *verts )
{
struct draw_context *draw = fetch->draw;
struct translate *translate = fetch->translate;
unsigned i;
for (i = 0; i < draw->nr_vertex_buffers; i++) {
translate->set_buffer(translate,
i,
((char *)draw->user.vbuffer[i] +
draw->vertex_buffer[i].buffer_offset),
draw->vertex_buffer[i].pitch );
}
translate->run_elts( translate,
elts,
count,
verts );
}
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw )
{
struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch);
if (!fetch)
return NULL;
fetch->draw = draw;
return fetch;
}
void draw_pt_fetch_destroy( struct pt_fetch *fetch )
{
FREE(fetch);
}

View file

@ -286,9 +286,9 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
*/
draw_pt_run_pipeline( fpme->draw,
fpme->prim,
pipeline_verts,
fpme->pipeline_vertex_size,
(struct vertex_header *)pipeline_verts,
fetch_count,
fpme->pipeline_vertex_size,
draw_elts,
draw_count );

View file

@ -39,8 +39,11 @@ struct fetch_pipeline_middle_end {
struct draw_context *draw;
struct pt_emit *emit;
struct pt_fetch *fetch;
struct pt_post_vs *post_vs;
unsigned pipeline_vertex_size;
unsigned vertex_data_offset;
unsigned vertex_size;
unsigned prim;
unsigned opt;
};
@ -51,15 +54,43 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned opt )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *vs = draw->vertex_shader;
unsigned nr = MAX2( vs->info.num_inputs,
vs->info.num_outputs );
fpme->prim = prim;
fpme->opt = opt;
if (!(opt & PT_PIPELINE))
draw_pt_emit_prepare( fpme->emit, prim, opt );
/* Always leave room for the vertex header whether we need it or
* not. It's hard to get rid of it in particular because of the
* viewport code in draw_pt_post_vs.c.
*/
fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
draw_pt_fetch_prepare( fpme->fetch,
(opt & (PT_CLIPTEST | PT_PIPELINE)) != 0,
fpme->vertex_size );
/* XXX: it's not really gl rasterization rules we care about here,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
draw->rasterizer->bypass_clipping,
draw->identity_viewport,
draw->rasterizer->gl_rasterization_rules );
if (!(opt & PT_PIPELINE))
draw_pt_emit_prepare( fpme->emit,
prim );
/* No need to prepare the shader.
*/
vs->prepare(vs, draw);
//fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION;
}
@ -74,44 +105,63 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vertex_shader;
char *pipeline_verts;
unsigned pipeline = PT_PIPELINE;
unsigned opt = fpme->opt;
pipeline_verts = MALLOC(fpme->pipeline_vertex_size *
fetch_count);
struct vertex_header *pipeline_verts =
(struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count);
if (!pipeline_verts) {
assert(0);
return;
}
/* Shade
/* Fetch into our vertex buffer
*/
shader->prepare(shader, draw);
draw_pt_fetch_run( fpme->fetch,
fetch_elts,
fetch_count,
(char *)pipeline_verts );
if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts,
fpme->pipeline_vertex_size))
/* Run the shader, note that this overwrites the data[] parts of
* the pipeline verts. If there is no shader, ie a bypass shader,
* then the inputs == outputs, and are already in the correct
* place.
*/
if (opt & PT_SHADE)
{
pipeline |= PT_CLIPTEST;
shader->run_linear(shader,
(const float (*)[4])pipeline_verts->data,
( float (*)[4])pipeline_verts->data,
(const float (*)[4])draw->user.constants,
fetch_count,
fpme->vertex_size,
fpme->vertex_size);
}
if (draw_pt_post_vs_run( fpme->post_vs,
pipeline_verts,
fetch_count,
fpme->vertex_size ))
{
opt |= PT_PIPELINE;
}
/* Do we need to run the pipeline?
*/
if (fpme->opt & pipeline) {
if (opt & PT_PIPELINE) {
draw_pt_run_pipeline( fpme->draw,
fpme->prim,
pipeline_verts,
fpme->pipeline_vertex_size,
fetch_count,
fpme->vertex_size,
draw_elts,
draw_count );
} else {
}
else {
draw_pt_emit( fpme->emit,
pipeline_verts,
fpme->pipeline_vertex_size,
(const float (*)[4])pipeline_verts->data,
fetch_count,
fpme->vertex_size,
draw_elts,
draw_count );
}
@ -129,6 +179,17 @@ static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
if (fpme->fetch)
draw_pt_fetch_destroy( fpme->fetch );
if (fpme->emit)
draw_pt_emit_destroy( fpme->emit );
if (fpme->post_vs)
draw_pt_post_vs_destroy( fpme->post_vs );
FREE(middle);
}
@ -146,6 +207,14 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
fpme->draw = draw;
fpme->fetch = draw_pt_fetch_create( draw );
if (!fpme->fetch)
goto fail;
fpme->post_vs = draw_pt_post_vs_create( draw );
if (!fpme->post_vs)
goto fail;
fpme->emit = draw_pt_emit_create( draw );
if (!fpme->emit)
goto fail;

View file

@ -117,12 +117,13 @@ void draw_pt_reset_vertex_ids( struct draw_context *draw )
*/
void draw_pt_run_pipeline( struct draw_context *draw,
unsigned prim,
char *verts,
unsigned stride,
struct vertex_header *pipeline_verts,
unsigned vertex_count,
unsigned stride,
const ushort *elts,
unsigned count )
{
char *verts = (char *)pipeline_verts;
unsigned i;
draw->pt.pipeline.verts = verts;

View file

@ -0,0 +1,202 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "pipe/p_util.h"
#include "pipe/p_context.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
struct pt_post_vs {
struct draw_context *draw;
boolean (*run)( struct pt_post_vs *pvs,
struct vertex_header *vertices,
unsigned count,
unsigned stride );
};
static INLINE unsigned
compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr)
{
unsigned mask = 0x0;
unsigned i;
/* Do the hardwired planes first:
*/
if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
/* Followed by any remaining ones:
*/
for (i = 6; i < nr; i++) {
if (dot4(clip, plane[i]) < 0)
mask |= (1<<i);
}
return mask;
}
/* The normal case - cliptest, rhw divide, viewport transform.
*
* Also handle identity viewport here at the expense of a few wasted
* instructions
*/
static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
struct vertex_header *vertices,
unsigned count,
unsigned stride )
{
struct vertex_header *out = vertices;
const float *scale = pvs->draw->viewport.scale;
const float *trans = pvs->draw->viewport.translate;
unsigned j;
unsigned clipped = 0;
for (j = 0; j < count; j++) {
out->clip[0] = out->data[0][0];
out->clip[1] = out->data[0][1];
out->clip[2] = out->data[0][2];
out->clip[3] = out->data[0][3];
out->vertex_id = 0xffff;
out->edgeflag = 1;
out->clipmask = compute_clipmask_gl(out->clip,
pvs->draw->plane,
pvs->draw->nr_planes);
clipped += out->clipmask;
if (out->clipmask == 0)
{
/* divide by w */
float w = 1.0f / out->data[0][3];
/* Viewport mapping */
out->data[0][0] = out->data[0][0] * w * scale[0] + trans[0];
out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1];
out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2];
out->data[0][3] = w;
}
out = (struct vertex_header *)( (char *)out + stride );
}
return clipped != 0;
}
/* If bypass_clipping is set, skip cliptest and rhw divide.
*/
static boolean post_vs_viewport( struct pt_post_vs *pvs,
struct vertex_header *vertices,
unsigned count,
unsigned stride )
{
struct vertex_header *out = vertices;
const float *scale = pvs->draw->viewport.scale;
const float *trans = pvs->draw->viewport.translate;
unsigned j;
debug_printf("%s\n", __FUNCTION__);
for (j = 0; j < count; j++) {
/* Viewport mapping only, no cliptest/rhw divide
*/
out->data[0][0] = out->data[0][0] * scale[0] + trans[0];
out->data[0][1] = out->data[0][1] * scale[1] + trans[1];
out->data[0][2] = out->data[0][2] * scale[2] + trans[2];
out = (struct vertex_header *)((char *)out + stride);
}
return FALSE;
}
/* If bypass_clipping is set and we have an identity viewport, nothing
* to do.
*/
static boolean post_vs_none( struct pt_post_vs *pvs,
struct vertex_header *vertices,
unsigned count,
unsigned stride )
{
debug_printf("%s\n", __FUNCTION__);
return FALSE;
}
boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
struct vertex_header *pipeline_verts,
unsigned count,
unsigned stride )
{
return pvs->run( pvs, pipeline_verts, count, stride );
}
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
boolean bypass_clipping,
boolean identity_viewport,
boolean opengl )
{
if (bypass_clipping) {
if (identity_viewport)
pvs->run = post_vs_none;
else
pvs->run = post_vs_viewport;
}
else {
//if (opengl)
pvs->run = post_vs_cliptest_viewport_gl;
}
}
struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw )
{
struct pt_post_vs *pvs = CALLOC_STRUCT( pt_post_vs );
if (!pvs)
return NULL;
pvs->draw = draw;
return pvs;
}
void draw_pt_post_vs_destroy( struct pt_post_vs *pvs )
{
FREE(pvs);
}

View file

@ -58,8 +58,10 @@ static void
vs_exec_prepare( struct draw_vertex_shader *shader,
struct draw_context *draw )
{
struct exec_vertex_shader *evs = exec_vertex_shader(shader);
/* specify the vertex program to interpret/execute */
tgsi_exec_machine_bind_shader(&draw->machine,
tgsi_exec_machine_bind_shader(evs->machine,
shader->state.tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
@ -84,31 +86,45 @@ vs_exec_run( struct draw_vertex_shader *shader,
void *vOut,
unsigned vertex_size)
{
struct tgsi_exec_machine *machine = &draw->machine;
struct exec_vertex_shader *evs = exec_vertex_shader(shader);
struct tgsi_exec_machine *machine = evs->machine;
unsigned int i, j;
unsigned int clipped = 0;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
struct tgsi_exec_vector *outputs = 0;
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
assert(shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION);
machine->Consts = (const float (*)[4]) draw->user.constants;
machine->Inputs = ALIGN16_ASSIGN(inputs);
if (draw->rasterizer->bypass_vs) {
/* outputs are just the inputs */
machine->Outputs = machine->Inputs;
outputs = machine->Inputs;
}
else {
machine->Outputs = ALIGN16_ASSIGN(outputs);
outputs = machine->Outputs;
}
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices );
#if 0
for (j = 0; j < max_vertices; j++) {
unsigned slot;
debug_printf("%d) Input vert:\n", i + j);
for (slot = 0; slot < shader->info.num_inputs; slot++) {
debug_printf("\t%d: %f %f %f %f\n", slot,
machine->Inputs[slot].xyzw[0].f[j],
machine->Inputs[slot].xyzw[1].f[j],
machine->Inputs[slot].xyzw[2].f[j],
machine->Inputs[slot].xyzw[3].f[j]);
}
}
#endif
if (!draw->rasterizer->bypass_vs) {
/* run interpreter */
tgsi_exec_machine_run( machine );
@ -127,10 +143,10 @@ vs_exec_run( struct draw_vertex_shader *shader,
* program as a set of DP4 instructions appended to the
* user-provided code.
*/
x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j];
y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j];
z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j];
w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j];
x = out->clip[0] = outputs[0].xyzw[0].f[j];
y = out->clip[1] = outputs[0].xyzw[1].f[j];
z = out->clip[2] = outputs[0].xyzw[2].f[j];
w = out->clip[3] = outputs[0].xyzw[3].f[j];
if (!draw->rasterizer->bypass_clipping) {
out->clipmask = compute_clipmask(out->clip, draw->plane,
@ -156,7 +172,8 @@ vs_exec_run( struct draw_vertex_shader *shader,
out->data[0][2] = z * scale[2] + trans[2];
out->data[0][3] = w;
}
else {
else
{
out->data[0][0] = x;
out->data[0][1] = y;
out->data[0][2] = z;
@ -167,10 +184,10 @@ vs_exec_run( struct draw_vertex_shader *shader,
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
out->data[slot][0] = outputs[slot].xyzw[0].f[j];
out->data[slot][1] = outputs[slot].xyzw[1].f[j];
out->data[slot][2] = outputs[slot].xyzw[2].f[j];
out->data[slot][3] = outputs[slot].xyzw[3].f[j];
}
#if 0 /*DEBUG*/
@ -216,12 +233,25 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
/* Swizzle inputs.
*/
for (j = 0; j < max_vertices; j++) {
#if 0
debug_printf("%d) Input vert:\n", i + j);
for (slot = 0; slot < shader->info.num_inputs; slot++) {
debug_printf("\t%d: %f %f %f %f\n", slot,
input[slot][0],
input[slot][1],
input[slot][2],
input[slot][3]);
}
#endif
for (slot = 0; slot < shader->info.num_inputs; slot++) {
machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
input = (const float (*)[4])((const char *)input + input_stride);
}
/* run interpreter */
@ -235,13 +265,23 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
#if 0
debug_printf("%d) Post xform vert:\n", i + j);
for (slot = 0; slot < shader->info.num_outputs; slot++) {
debug_printf("\t%d: %f %f %f %f\n", slot,
output[slot][0],
output[slot][1],
output[slot][2],
output[slot][3]);
}
#endif
output = (float (*)[4])((char *)output + output_stride);
}
/* Advance input, output pointers:
*/
input = (const float (*)[4])((const char *)input + input_stride);
output = (float (*)[4])((char *)output + output_stride);
}
}

View file

@ -47,6 +47,7 @@
struct draw_llvm_vertex_shader {
struct draw_vertex_shader base;
struct gallivm_prog *llvm_prog;
struct tgsi_exec_machine *machine;
};
@ -77,12 +78,9 @@ vs_llvm_run( struct draw_vertex_shader *base,
struct draw_llvm_vertex_shader *shader =
(struct draw_llvm_vertex_shader *)base;
struct tgsi_exec_machine *machine = &draw->machine;
struct tgsi_exec_machine *machine = shader->machine;
unsigned int j;
unsigned int clipped = 0;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
@ -93,13 +91,12 @@ vs_llvm_run( struct draw_vertex_shader *base,
/* Consts does not require 16 byte alignment. */
machine->Consts = (float (*)[4]) draw->user.constants;
machine->Inputs = ALIGN16_ASSIGN(inputs);
if (draw->rasterizer->bypass_vs) {
/* outputs are just the inputs */
machine->Outputs = machine->Inputs;
outputs = machine->Inputs;
}
else {
machine->Outputs = ALIGN16_ASSIGN(outputs);
outputs = machine->Outputs;
}
@ -119,10 +116,10 @@ vs_llvm_run( struct draw_vertex_shader *base,
unsigned slot;
float x, y, z, w;
x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
x = vOut[j]->clip[0] = outputs[0].xyzw[0].f[j];
y = vOut[j]->clip[1] = outputs[0].xyzw[1].f[j];
z = vOut[j]->clip[2] = outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = outputs[0].xyzw[3].f[j];
if (!draw->rasterizer->bypass_clipping) {
vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane,
@ -159,10 +156,10 @@ vs_llvm_run( struct draw_vertex_shader *base,
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
vOut[j]->data[slot][0] = outputs[slot].xyzw[0].f[j];
vOut[j]->data[slot][1] = outputs[slot].xyzw[1].f[j];
vOut[j]->data[slot][2] = outputs[slot].xyzw[2].f[j];
vOut[j]->data[slot][3] = outputs[slot].xyzw[3].f[j];
}
} /* loop over vertices */
return clipped != 0;
@ -183,7 +180,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *base,
struct draw_llvm_vertex_shader *shader =
(struct draw_llvm_vertex_shader *)base;
struct tgsi_exec_machine *machine = &draw->machine;
struct tgsi_exec_machine *machine = shader->machine;
unsigned int j;
@ -199,6 +196,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base,
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
input = (const float (*)[4])((const char *)input + input_stride);
}
/* run shader */
@ -216,12 +215,9 @@ vs_llvm_run_linear( struct draw_vertex_shader *base,
output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
/* Advance input, output pointers:
*/
input = (const float (*)[4])((const char *)input + input_stride);
output = (float (*)[4])((char *)output + output_stride);
output = (float (*)[4])((char *)output + output_stride);
}
}
}
@ -263,6 +259,7 @@ draw_create_vs_llvm(struct draw_context *draw,
vs->base.run = vs_llvm_run;
vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
vs->machine = &draw->machine;
{
struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);

View file

@ -91,12 +91,10 @@ vs_sse_run( struct draw_vertex_shader *base,
unsigned vertex_size )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
struct tgsi_exec_machine *machine = &draw->machine;
struct tgsi_exec_machine *machine = shader->machine;
unsigned int i, j;
unsigned int clipped = 0;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
struct tgsi_exec_vector *outputs = 0;
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
@ -104,13 +102,13 @@ vs_sse_run( struct draw_vertex_shader *base,
/* Consts does not require 16 byte alignment. */
machine->Consts = (const float (*)[4]) draw->user.constants;
machine->Inputs = ALIGN16_ASSIGN(inputs);
if (draw->rasterizer->bypass_vs) {
/* outputs are just the inputs */
machine->Outputs = machine->Inputs;
outputs = machine->Inputs;
}
else {
machine->Outputs = ALIGN16_ASSIGN(outputs);
outputs = machine->Outputs;
}
for (i = 0; i < count; i += SSE_MAX_VERTICES) {
@ -142,10 +140,10 @@ vs_sse_run( struct draw_vertex_shader *base,
struct vertex_header *out =
draw_header_from_block(vOut, vertex_size, i + j);
x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j];
y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j];
z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j];
w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j];
x = out->clip[0] = outputs[0].xyzw[0].f[j];
y = out->clip[1] = outputs[0].xyzw[1].f[j];
z = out->clip[2] = outputs[0].xyzw[2].f[j];
w = out->clip[3] = outputs[0].xyzw[3].f[j];
if (!draw->rasterizer->bypass_clipping) {
out->clipmask = compute_clipmask(out->clip, draw->plane,
@ -182,10 +180,10 @@ vs_sse_run( struct draw_vertex_shader *base,
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
out->data[slot][0] = outputs[slot].xyzw[0].f[j];
out->data[slot][1] = outputs[slot].xyzw[1].f[j];
out->data[slot][2] = outputs[slot].xyzw[2].f[j];
out->data[slot][3] = outputs[slot].xyzw[3].f[j];
}
#if 0 /*DEBUG*/
printf("%d) Post xform vert:\n", i + j);
@ -233,6 +231,8 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
input = (const float (*)[4])((const char *)input + input_stride);
}
/* run compiled shader
@ -253,12 +253,9 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
}
/* Advance input, output pointers:
*/
input = (const float (*)[4])((const char *)input + input_stride);
output = (float (*)[4])((char *)output + output_stride);
output = (float (*)[4])((char *)output + output_stride);
}
}
}
@ -300,6 +297,7 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.run = vs_sse_run;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
vs->machine = &draw->machine;
x86_init_func( &vs->sse2_program );