Move vertex transformation/shader code into draw_prim.c to avoid need for vs_flush() function pointer.

This commit is contained in:
Brian 2007-08-20 18:53:41 -06:00
parent 3772441c49
commit 3cb6fc3f9a
3 changed files with 270 additions and 272 deletions

View file

@ -40,271 +40,6 @@
#include "pipe/draw/draw_context.h"
#include "pipe/draw/draw_prim.h"
#include "pipe/tgsi/core/tgsi_exec.h"
#include "pipe/tgsi/core/tgsi_build.h"
#include "pipe/tgsi/core/tgsi_util.h"
/** XXX remove */
#define VERT_RESULT_HPOS 0
#define VERT_RESULT_MAX 24
static INLINE unsigned
compute_clipmask(float cx, float cy, float cz, float cw)
{
unsigned mask;
#if defined(macintosh) || defined(__powerpc__)
/* on powerpc cliptest is 17% faster in this way. */
mask = (((cw < cx) << CLIP_RIGHT_SHIFT));
mask |= (((cw < -cx) << CLIP_LEFT_SHIFT));
mask |= (((cw < cy) << CLIP_TOP_SHIFT));
mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT));
mask |= (((cw < cz) << CLIP_FAR_SHIFT));
mask |= (((cw < -cz) << CLIP_NEAR_SHIFT));
#else /* !defined(macintosh)) */
mask = 0x0;
if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT;
if ( cx + cw < 0) mask |= CLIP_LEFT_BIT;
if (-cy + cw < 0) mask |= CLIP_TOP_BIT;
if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT;
if (-cz + cw < 0) mask |= CLIP_FAR_BIT;
if ( cz + cw < 0) mask |= CLIP_NEAR_BIT;
#endif /* defined(macintosh) */
return mask;
}
/**
* Fetch a float[4] vertex attribute from memory, doing format/type
* conversion as needed.
* XXX this might be a temporary thing.
*/
static void
fetch_attrib4(const void *ptr, unsigned format, float attrib[4])
{
/* defaults */
attrib[1] = 0.0;
attrib[2] = 0.0;
attrib[3] = 1.0;
switch (format) {
case PIPE_FORMAT_R32G32B32A32_FLOAT:
attrib[3] = ((float *) ptr)[3];
/* fall-through */
case PIPE_FORMAT_R32G32B32_FLOAT:
attrib[2] = ((float *) ptr)[2];
/* fall-through */
case PIPE_FORMAT_R32G32_FLOAT:
attrib[1] = ((float *) ptr)[1];
/* fall-through */
case PIPE_FORMAT_R32_FLOAT:
attrib[0] = ((float *) ptr)[0];
break;
default:
assert(0);
}
}
/**
* Transform vertices with the current vertex program/shader
* Up to four vertices can be shaded at a time.
* \param vbuffer the input vertex data
* \param elts indexes of four input vertices
* \param count number of vertices to shade [1..4]
* \param vOut array of pointers to four output vertices
*/
static void
run_vertex_program(struct draw_context *draw,
unsigned elts[4], unsigned count,
struct vertex_header *vOut[])
{
struct tgsi_exec_machine machine;
unsigned int j;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
assert(count <= 4);
#ifdef DEBUG
memset( &machine, 0, sizeof( machine ) );
#endif
/* init machine state */
tgsi_exec_machine_init(&machine,
draw->vertex_shader.tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
/* Consts does not require 16 byte alignment. */
machine.Consts = draw->vertex_shader.constants->constant;
machine.Inputs = ALIGN16_ASSIGN(inputs);
machine.Outputs = ALIGN16_ASSIGN(outputs);
if (0)
{
unsigned attr;
for (attr = 0; attr < 16; attr++) {
if (draw->vertex_shader.inputs_read & (1 << attr)) {
printf("attr %d: buf_off %d src_off %d pitch %d\n",
attr,
draw->vertex_buffer[attr].buffer_offset,
draw->vertex_element[attr].src_offset,
draw->vertex_buffer[attr].pitch);
}
}
}
/* load machine inputs */
for (j = 0; j < count; j++) {
unsigned attr;
for (attr = 0; attr < 16; attr++) {
if (draw->vertex_shader.inputs_read & (1 << attr)) {
unsigned buf = draw->vertex_element[attr].vertex_buffer_index;
const void *src
= (const void *) ((const ubyte *) draw->mapped_vbuffer[buf]
+ draw->vertex_buffer[buf].buffer_offset
+ draw->vertex_element[attr].src_offset
+ elts[j] * draw->vertex_buffer[buf].pitch);
float p[4];
fetch_attrib4(src, draw->vertex_element[attr].src_format, p);
machine.Inputs[attr].xyzw[0].f[j] = p[0]; /*X*/
machine.Inputs[attr].xyzw[1].f[j] = p[1]; /*Y*/
machine.Inputs[attr].xyzw[2].f[j] = p[2]; /*Z*/
machine.Inputs[attr].xyzw[3].f[j] = p[3]; /*W*/
#if 0
if (attr == 0) {
printf("Input vertex %d: %f %f %f\n",
j, p[0], p[1], p[2]);
}
#endif
}
}
}
#if 0
printf("Consts:\n");
for (i = 0; i < 4; i++) {
printf(" %d: %f %f %f %f\n", i,
machine.Consts[i][0],
machine.Consts[i][1],
machine.Consts[i][2],
machine.Consts[i][3]);
}
#endif
/* run shader */
tgsi_exec_machine_run( &machine );
#if 0
printf("VS result: %f %f %f %f\n",
outputs[0].xyzw[0].f[0],
outputs[0].xyzw[1].f[0],
outputs[0].xyzw[2].f[0],
outputs[0].xyzw[3].f[0]);
#endif
/* store machine results */
assert(draw->vertex_shader.outputs_written & (1 << VERT_RESULT_HPOS));
for (j = 0; j < count; j++) {
unsigned attr, slot;
float x, y, z, w;
/* Handle attr[0] (position) specially: */
x = vOut[j]->clip[0] = outputs[0].xyzw[0].f[j];
y = vOut[j]->clip[1] = outputs[0].xyzw[1].f[j];
z = vOut[j]->clip[2] = outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = outputs[0].xyzw[3].f[j];
vOut[j]->clipmask = compute_clipmask(x, y, z, w);
vOut[j]->edgeflag = 1;
/* divide by w */
w = 1.0 / w;
x *= w;
y *= w;
z *= w;
/* Viewport mapping */
vOut[j]->data[0][0] = x * scale[0] + trans[0];
vOut[j]->data[0][1] = y * scale[1] + trans[1];
vOut[j]->data[0][2] = z * scale[2] + trans[2];
vOut[j]->data[0][3] = w;
#if 0
printf("wincoord: %f %f %f %f\n",
vOut[j]->data[0][0],
vOut[j]->data[0][1],
vOut[j]->data[0][2],
vOut[j]->data[0][3]);
#endif
/* remaining attributes: */
/* pack into sequential post-transform attrib slots */
slot = 1;
for (attr = 1; attr < VERT_RESULT_MAX; attr++) {
if (draw->vertex_shader.outputs_written & (1 << attr)) {
assert(slot < draw->nr_attrs);
vOut[j]->data[slot][0] = outputs[attr].xyzw[0].f[j];
vOut[j]->data[slot][1] = outputs[attr].xyzw[1].f[j];
vOut[j]->data[slot][2] = outputs[attr].xyzw[2].f[j];
vOut[j]->data[slot][3] = outputs[attr].xyzw[3].f[j];
slot++;
}
}
}
#if 0
memcpy(
quad->outputs.color,
&machine.Outputs[1].xyzw[0].f[0],
sizeof( quad->outputs.color ) );
#endif
}
/**
* Called by the draw module when the vertx cache needs to be flushed.
* This involves running the vertex shader.
*/
static void vs_flush( struct draw_context *draw )
{
unsigned i, j;
/* run vertex shader on vertex cache entries, four per invokation */
for (i = 0; i < draw->vs.queue_nr; i += 4) {
struct vertex_header *dests[4];
unsigned elts[4];
int n;
for (j = 0; j < 4; j++) {
elts[j] = draw->vs.queue[i + j].elt;
dests[j] = draw->vs.queue[i + j].dest;
}
n = MIN2(4, draw->vs.queue_nr - i);
assert(n > 0);
assert(n <= 4);
run_vertex_program(draw, elts, n, dests);
}
draw->vs.queue_nr = 0;
}
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer)
{
draw->mapped_vbuffer[attr] = buffer;
}
/**
* Draw vertex arrays
@ -320,8 +55,6 @@ draw_arrays(struct draw_context *draw, unsigned prim,
/* tell drawing pipeline we're beginning drawing */
draw->pipeline.first->begin( draw->pipeline.first );
draw->vs_flush = vs_flush;
draw_invalidate_vcache( draw );
draw_set_prim( draw, prim );

View file

@ -35,6 +35,10 @@
#include "draw_context.h"
#include "draw_prim.h"
#include "pipe/tgsi/core/tgsi_exec.h"
#include "pipe/tgsi/core/tgsi_build.h"
#include "pipe/tgsi/core/tgsi_util.h"
#define RP_NONE 0
#define RP_POINT 1
@ -56,6 +60,261 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
};
/** XXX remove */
#define VERT_RESULT_HPOS 0
#define VERT_RESULT_MAX 24
static INLINE unsigned
compute_clipmask(float cx, float cy, float cz, float cw)
{
unsigned mask;
#if defined(macintosh) || defined(__powerpc__)
/* on powerpc cliptest is 17% faster in this way. */
mask = (((cw < cx) << CLIP_RIGHT_SHIFT));
mask |= (((cw < -cx) << CLIP_LEFT_SHIFT));
mask |= (((cw < cy) << CLIP_TOP_SHIFT));
mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT));
mask |= (((cw < cz) << CLIP_FAR_SHIFT));
mask |= (((cw < -cz) << CLIP_NEAR_SHIFT));
#else /* !defined(macintosh)) */
mask = 0x0;
if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT;
if ( cx + cw < 0) mask |= CLIP_LEFT_BIT;
if (-cy + cw < 0) mask |= CLIP_TOP_BIT;
if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT;
if (-cz + cw < 0) mask |= CLIP_FAR_BIT;
if ( cz + cw < 0) mask |= CLIP_NEAR_BIT;
#endif /* defined(macintosh) */
return mask;
}
/**
* Fetch a float[4] vertex attribute from memory, doing format/type
* conversion as needed.
* XXX this might be a temporary thing.
*/
static void
fetch_attrib4(const void *ptr, unsigned format, float attrib[4])
{
/* defaults */
attrib[1] = 0.0;
attrib[2] = 0.0;
attrib[3] = 1.0;
switch (format) {
case PIPE_FORMAT_R32G32B32A32_FLOAT:
attrib[3] = ((float *) ptr)[3];
/* fall-through */
case PIPE_FORMAT_R32G32B32_FLOAT:
attrib[2] = ((float *) ptr)[2];
/* fall-through */
case PIPE_FORMAT_R32G32_FLOAT:
attrib[1] = ((float *) ptr)[1];
/* fall-through */
case PIPE_FORMAT_R32_FLOAT:
attrib[0] = ((float *) ptr)[0];
break;
default:
assert(0);
}
}
/**
* Transform vertices with the current vertex program/shader
* Up to four vertices can be shaded at a time.
* \param vbuffer the input vertex data
* \param elts indexes of four input vertices
* \param count number of vertices to shade [1..4]
* \param vOut array of pointers to four output vertices
*/
static void
run_vertex_program(struct draw_context *draw,
unsigned elts[4], unsigned count,
struct vertex_header *vOut[])
{
struct tgsi_exec_machine machine;
unsigned int j;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
assert(count <= 4);
#ifdef DEBUG
memset( &machine, 0, sizeof( machine ) );
#endif
/* init machine state */
tgsi_exec_machine_init(&machine,
draw->vertex_shader.tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
/* Consts does not require 16 byte alignment. */
machine.Consts = draw->vertex_shader.constants->constant;
machine.Inputs = ALIGN16_ASSIGN(inputs);
machine.Outputs = ALIGN16_ASSIGN(outputs);
if (0)
{
unsigned attr;
for (attr = 0; attr < 16; attr++) {
if (draw->vertex_shader.inputs_read & (1 << attr)) {
printf("attr %d: buf_off %d src_off %d pitch %d\n",
attr,
draw->vertex_buffer[attr].buffer_offset,
draw->vertex_element[attr].src_offset,
draw->vertex_buffer[attr].pitch);
}
}
}
/* load machine inputs */
for (j = 0; j < count; j++) {
unsigned attr;
for (attr = 0; attr < 16; attr++) {
if (draw->vertex_shader.inputs_read & (1 << attr)) {
unsigned buf = draw->vertex_element[attr].vertex_buffer_index;
const void *src
= (const void *) ((const ubyte *) draw->mapped_vbuffer[buf]
+ draw->vertex_buffer[buf].buffer_offset
+ draw->vertex_element[attr].src_offset
+ elts[j] * draw->vertex_buffer[buf].pitch);
float p[4];
fetch_attrib4(src, draw->vertex_element[attr].src_format, p);
machine.Inputs[attr].xyzw[0].f[j] = p[0]; /*X*/
machine.Inputs[attr].xyzw[1].f[j] = p[1]; /*Y*/
machine.Inputs[attr].xyzw[2].f[j] = p[2]; /*Z*/
machine.Inputs[attr].xyzw[3].f[j] = p[3]; /*W*/
#if 0
if (attr == 0) {
printf("Input vertex %d: %f %f %f\n",
j, p[0], p[1], p[2]);
}
#endif
}
}
}
#if 0
printf("Consts:\n");
for (i = 0; i < 4; i++) {
printf(" %d: %f %f %f %f\n", i,
machine.Consts[i][0],
machine.Consts[i][1],
machine.Consts[i][2],
machine.Consts[i][3]);
}
#endif
/* run shader */
tgsi_exec_machine_run( &machine );
#if 0
printf("VS result: %f %f %f %f\n",
outputs[0].xyzw[0].f[0],
outputs[0].xyzw[1].f[0],
outputs[0].xyzw[2].f[0],
outputs[0].xyzw[3].f[0]);
#endif
/* store machine results */
assert(draw->vertex_shader.outputs_written & (1 << VERT_RESULT_HPOS));
for (j = 0; j < count; j++) {
unsigned attr, slot;
float x, y, z, w;
/* Handle attr[0] (position) specially: */
x = vOut[j]->clip[0] = outputs[0].xyzw[0].f[j];
y = vOut[j]->clip[1] = outputs[0].xyzw[1].f[j];
z = vOut[j]->clip[2] = outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = outputs[0].xyzw[3].f[j];
vOut[j]->clipmask = compute_clipmask(x, y, z, w);
vOut[j]->edgeflag = 1;
/* divide by w */
w = 1.0 / w;
x *= w;
y *= w;
z *= w;
/* Viewport mapping */
vOut[j]->data[0][0] = x * scale[0] + trans[0];
vOut[j]->data[0][1] = y * scale[1] + trans[1];
vOut[j]->data[0][2] = z * scale[2] + trans[2];
vOut[j]->data[0][3] = w;
#if 0
printf("wincoord: %f %f %f %f\n",
vOut[j]->data[0][0],
vOut[j]->data[0][1],
vOut[j]->data[0][2],
vOut[j]->data[0][3]);
#endif
/* remaining attributes: */
/* pack into sequential post-transform attrib slots */
slot = 1;
for (attr = 1; attr < VERT_RESULT_MAX; attr++) {
if (draw->vertex_shader.outputs_written & (1 << attr)) {
assert(slot < draw->nr_attrs);
vOut[j]->data[slot][0] = outputs[attr].xyzw[0].f[j];
vOut[j]->data[slot][1] = outputs[attr].xyzw[1].f[j];
vOut[j]->data[slot][2] = outputs[attr].xyzw[2].f[j];
vOut[j]->data[slot][3] = outputs[attr].xyzw[3].f[j];
slot++;
}
}
}
#if 0
memcpy(
quad->outputs.color,
&machine.Outputs[1].xyzw[0].f[0],
sizeof( quad->outputs.color ) );
#endif
}
/**
* Called by the draw module when the vertx cache needs to be flushed.
* This involves running the vertex shader.
*/
static void transform_vertices( struct draw_context *draw )
{
unsigned i, j;
/* run vertex shader on vertex cache entries, four per invokation */
for (i = 0; i < draw->vs.queue_nr; i += 4) {
struct vertex_header *dests[4];
unsigned elts[4];
int n;
for (j = 0; j < 4; j++) {
elts[j] = draw->vs.queue[i + j].elt;
dests[j] = draw->vs.queue[i + j].dest;
}
n = MIN2(4, draw->vs.queue_nr - i);
assert(n > 0);
assert(n <= 4);
run_vertex_program(draw, elts, n, dests);
}
draw->vs.queue_nr = 0;
}
void draw_flush( struct draw_context *draw )
{
struct draw_stage *first = draw->pipeline.first;
@ -63,9 +322,7 @@ void draw_flush( struct draw_context *draw )
/* Make sure all vertices are available:
*/
assert(draw->vs_flush);
draw->vs_flush( draw );
transform_vertices(draw);
switch (draw->reduced_prim) {
case RP_TRI:
@ -459,6 +716,16 @@ draw_set_mapped_element_buffer( struct draw_context *draw,
}
/**
* Tell drawing context where to find mapped vertex buffers.
*/
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer)
{
draw->mapped_vbuffer[attr] = buffer;
}
unsigned
draw_prim_info(unsigned prim, unsigned *first, unsigned *incr)
{

View file

@ -186,8 +186,6 @@ struct draw_context
unsigned prim; /**< current prim type: PIPE_PRIM_x */
unsigned reduced_prim;
void (*vs_flush)( struct draw_context *draw );
struct vertex_header *(*get_vertex)( struct draw_context *draw,
unsigned i );