llvmpipe: expand quad pipeline to process >1 quad at a time

This is part one -- we still only pass a single quad down, but
the code can now cope with more.  The quads must all be from the same
tile.
This commit is contained in:
Keith Whitwell 2009-07-24 16:49:35 +01:00 committed by José Fonseca
parent 4486012245
commit bdbb4beb21
14 changed files with 760 additions and 648 deletions

View file

@ -9,76 +9,80 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
static void
alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
const float ref = llvmpipe->depth_stencil->alpha.ref_value;
unsigned passMask = 0x0, j;
const uint cbuf = 0; /* only output[0].alpha is tested */
const float *aaaa = quad->output.color[cbuf][3];
switch (llvmpipe->depth_stencil->alpha.func) {
case PIPE_FUNC_NEVER:
break;
case PIPE_FUNC_LESS:
/*
* If mask were an array [4] we could do this SIMD-style:
* passMask = (quad->outputs.color[0][3] <= vec4(ref));
*/
for (j = 0; j < QUAD_SIZE; j++) {
if (aaaa[j] < ref) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_EQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (aaaa[j] == ref) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_LEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (aaaa[j] <= ref) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GREATER:
for (j = 0; j < QUAD_SIZE; j++) {
if (aaaa[j] > ref) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_NOTEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (aaaa[j] != ref) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (aaaa[j] >= ref) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_ALWAYS:
passMask = MASK_ALL;
break;
default:
assert(0);
#define ALPHATEST( FUNC, COMP ) \
static void \
alpha_test_quads_##FUNC( struct quad_stage *qs, \
struct quad_header *quads[], \
unsigned nr ) \
{ \
const float ref = qs->llvmpipe->depth_stencil->alpha.ref_value; \
const uint cbuf = 0; /* only output[0].alpha is tested */ \
unsigned pass_nr = 0; \
unsigned i; \
\
for (i = 0; i < nr; i++) { \
const float *aaaa = quads[i]->output.color[cbuf][3]; \
unsigned passMask = 0; \
\
if (aaaa[0] COMP ref) passMask |= (1 << 0); \
if (aaaa[1] COMP ref) passMask |= (1 << 1); \
if (aaaa[2] COMP ref) passMask |= (1 << 2); \
if (aaaa[3] COMP ref) passMask |= (1 << 3); \
\
quads[i]->inout.mask &= passMask; \
\
if (quads[i]->inout.mask) \
quads[pass_nr++] = quads[i]; \
} \
\
if (pass_nr) \
qs->next->run(qs->next, quads, pass_nr); \
}
quad->inout.mask &= passMask;
if (quad->inout.mask)
qs->next->run(qs->next, quad);
ALPHATEST( LESS, < )
ALPHATEST( EQUAL, == )
ALPHATEST( LEQUAL, <= )
ALPHATEST( GREATER, > )
ALPHATEST( NOTEQUAL, != )
ALPHATEST( GEQUAL, >= )
/* XXX: Incorporate into shader using KILP.
*/
static void
alpha_test_quad(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
switch (qs->llvmpipe->depth_stencil->alpha.func) {
case PIPE_FUNC_LESS:
alpha_test_quads_LESS( qs, quads, nr );
break;
case PIPE_FUNC_EQUAL:
alpha_test_quads_EQUAL( qs, quads, nr );
break;
case PIPE_FUNC_LEQUAL:
alpha_test_quads_LEQUAL( qs, quads, nr );
break;
case PIPE_FUNC_GREATER:
alpha_test_quads_GREATER( qs, quads, nr );
break;
case PIPE_FUNC_NOTEQUAL:
alpha_test_quads_NOTEQUAL( qs, quads, nr );
break;
case PIPE_FUNC_GEQUAL:
alpha_test_quads_GEQUAL( qs, quads, nr );
break;
case PIPE_FUNC_ALWAYS:
assert(0); /* should be caught earlier */
qs->next->run(qs->next, quads, nr);
break;
case PIPE_FUNC_NEVER:
default:
assert(0); /* should be caught earlier */
return;
}
}

View file

@ -117,10 +117,16 @@ do { \
static void
logicop_quad(struct quad_stage *qs, struct quad_header *quad)
logicop_quad(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
uint cbuf;
struct llvmpipe_cached_tile *
tile = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
quads[0]->input.x0,
quads[0]->input.y0);
/* loop over colorbuffer outputs */
for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
@ -129,165 +135,161 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad)
uint *src4 = (uint *) src;
uint *dst4 = (uint *) dst;
uint *res4 = (uint *) res;
struct llvmpipe_cached_tile *
tile = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
quad->input.x0, quad->input.y0);
float (*quadColor)[4] = quad->output.color[cbuf];
uint i, j;
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
for (i = 0; i < 4; i++) {
dest[i][j] = tile->data.color[y][x][i];
for (i = 0; i < nr; i++) {
struct quad_header *quad = quads[i];
float (*quadColor)[4] = quad->output.color[cbuf];
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
for (i = 0; i < 4; i++) {
dest[i][j] = tile->data.color[y][x][i];
}
}
/* convert to ubyte */
for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
}
switch (llvmpipe->blend->logicop_func) {
case PIPE_LOGICOP_CLEAR:
for (j = 0; j < 4; j++)
res4[j] = 0;
break;
case PIPE_LOGICOP_NOR:
for (j = 0; j < 4; j++)
res4[j] = ~(src4[j] | dst4[j]);
break;
case PIPE_LOGICOP_AND_INVERTED:
for (j = 0; j < 4; j++)
res4[j] = ~src4[j] & dst4[j];
break;
case PIPE_LOGICOP_COPY_INVERTED:
for (j = 0; j < 4; j++)
res4[j] = ~src4[j];
break;
case PIPE_LOGICOP_AND_REVERSE:
for (j = 0; j < 4; j++)
res4[j] = src4[j] & ~dst4[j];
break;
case PIPE_LOGICOP_INVERT:
for (j = 0; j < 4; j++)
res4[j] = ~dst4[j];
break;
case PIPE_LOGICOP_XOR:
for (j = 0; j < 4; j++)
res4[j] = dst4[j] ^ src4[j];
break;
case PIPE_LOGICOP_NAND:
for (j = 0; j < 4; j++)
res4[j] = ~(src4[j] & dst4[j]);
break;
case PIPE_LOGICOP_AND:
for (j = 0; j < 4; j++)
res4[j] = src4[j] & dst4[j];
break;
case PIPE_LOGICOP_EQUIV:
for (j = 0; j < 4; j++)
res4[j] = ~(src4[j] ^ dst4[j]);
break;
case PIPE_LOGICOP_NOOP:
for (j = 0; j < 4; j++)
res4[j] = dst4[j];
break;
case PIPE_LOGICOP_OR_INVERTED:
for (j = 0; j < 4; j++)
res4[j] = ~src4[j] | dst4[j];
break;
case PIPE_LOGICOP_COPY:
for (j = 0; j < 4; j++)
res4[j] = src4[j];
break;
case PIPE_LOGICOP_OR_REVERSE:
for (j = 0; j < 4; j++)
res4[j] = src4[j] | ~dst4[j];
break;
case PIPE_LOGICOP_OR:
for (j = 0; j < 4; j++)
res4[j] = src4[j] | dst4[j];
break;
case PIPE_LOGICOP_SET:
for (j = 0; j < 4; j++)
res4[j] = ~0;
break;
default:
assert(0);
}
for (j = 0; j < 4; j++) {
quadColor[j][0] = ubyte_to_float(res[j][0]);
quadColor[j][1] = ubyte_to_float(res[j][1]);
quadColor[j][2] = ubyte_to_float(res[j][2]);
quadColor[j][3] = ubyte_to_float(res[j][3]);
}
}
/* convert to ubyte */
for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
}
switch (llvmpipe->blend->logicop_func) {
case PIPE_LOGICOP_CLEAR:
for (j = 0; j < 4; j++)
res4[j] = 0;
break;
case PIPE_LOGICOP_NOR:
for (j = 0; j < 4; j++)
res4[j] = ~(src4[j] | dst4[j]);
break;
case PIPE_LOGICOP_AND_INVERTED:
for (j = 0; j < 4; j++)
res4[j] = ~src4[j] & dst4[j];
break;
case PIPE_LOGICOP_COPY_INVERTED:
for (j = 0; j < 4; j++)
res4[j] = ~src4[j];
break;
case PIPE_LOGICOP_AND_REVERSE:
for (j = 0; j < 4; j++)
res4[j] = src4[j] & ~dst4[j];
break;
case PIPE_LOGICOP_INVERT:
for (j = 0; j < 4; j++)
res4[j] = ~dst4[j];
break;
case PIPE_LOGICOP_XOR:
for (j = 0; j < 4; j++)
res4[j] = dst4[j] ^ src4[j];
break;
case PIPE_LOGICOP_NAND:
for (j = 0; j < 4; j++)
res4[j] = ~(src4[j] & dst4[j]);
break;
case PIPE_LOGICOP_AND:
for (j = 0; j < 4; j++)
res4[j] = src4[j] & dst4[j];
break;
case PIPE_LOGICOP_EQUIV:
for (j = 0; j < 4; j++)
res4[j] = ~(src4[j] ^ dst4[j]);
break;
case PIPE_LOGICOP_NOOP:
for (j = 0; j < 4; j++)
res4[j] = dst4[j];
break;
case PIPE_LOGICOP_OR_INVERTED:
for (j = 0; j < 4; j++)
res4[j] = ~src4[j] | dst4[j];
break;
case PIPE_LOGICOP_COPY:
for (j = 0; j < 4; j++)
res4[j] = src4[j];
break;
case PIPE_LOGICOP_OR_REVERSE:
for (j = 0; j < 4; j++)
res4[j] = src4[j] | ~dst4[j];
break;
case PIPE_LOGICOP_OR:
for (j = 0; j < 4; j++)
res4[j] = src4[j] | dst4[j];
break;
case PIPE_LOGICOP_SET:
for (j = 0; j < 4; j++)
res4[j] = ~0;
break;
default:
assert(0);
}
for (j = 0; j < 4; j++) {
quadColor[j][0] = ubyte_to_float(res[j][0]);
quadColor[j][1] = ubyte_to_float(res[j][1]);
quadColor[j][2] = ubyte_to_float(res[j][2]);
quadColor[j][3] = ubyte_to_float(res[j][3]);
}
}
/* pass quad to next stage */
qs->next->run(qs->next, quad);
}
static void
blend_quad(struct quad_stage *qs, struct quad_header *quad)
blend_quads(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
static const float zero[4] = { 0, 0, 0, 0 };
static const float one[4] = { 1, 1, 1, 1 };
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
uint cbuf;
if (llvmpipe->blend->logicop_enable) {
logicop_quad(qs, quad);
return;
}
/* loop over colorbuffer outputs */
for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
float source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
struct llvmpipe_cached_tile *tile
= lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
quad->input.x0, quad->input.y0);
float (*quadColor)[4] = quad->output.color[cbuf];
uint i, j;
quads[0]->input.x0,
quads[0]->input.y0);
uint q, i, j;
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
for (i = 0; i < 4; i++) {
dest[i][j] = tile->data.color[y][x][i];
for (q = 0; q < nr; q++) {
struct quad_header *quad = quads[q];
float (*quadColor)[4] = quad->output.color[cbuf];
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
for (i = 0; i < 4; i++) {
dest[i][j] = tile->data.color[y][x][i];
}
}
}
/*
* Compute src/first term RGB
*/
switch (llvmpipe->blend->rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
VEC4_COPY(source[0], quadColor[0]); /* R */
VEC4_COPY(source[1], quadColor[1]); /* G */
VEC4_COPY(source[2], quadColor[2]); /* B */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
/*
* Compute src/first term RGB
*/
switch (llvmpipe->blend->rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
VEC4_COPY(source[0], quadColor[0]); /* R */
VEC4_COPY(source[1], quadColor[1]); /* G */
VEC4_COPY(source[2], quadColor[2]); /* B */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
{
const float *alpha = quadColor[3];
VEC4_MUL(source[0], quadColor[0], alpha); /* R */
@ -295,12 +297,12 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], alpha); /* B */
}
break;
case PIPE_BLENDFACTOR_DST_COLOR:
VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
break;
case PIPE_BLENDFACTOR_DST_ALPHA:
case PIPE_BLENDFACTOR_DST_COLOR:
VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
break;
case PIPE_BLENDFACTOR_DST_ALPHA:
{
const float *alpha = dest[3];
VEC4_MUL(source[0], quadColor[0], alpha); /* R */
@ -308,7 +310,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], alpha); /* B */
}
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
{
const float *alpha = quadColor[3];
float diff[4], temp[4];
@ -319,7 +321,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], temp); /* B */
}
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
case PIPE_BLENDFACTOR_CONST_COLOR:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */
@ -330,7 +332,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], comp); /* B */
}
break;
case PIPE_BLENDFACTOR_CONST_ALPHA:
case PIPE_BLENDFACTOR_CONST_ALPHA:
{
float alpha[4];
VEC4_SCALAR(alpha, llvmpipe->blend_color.color[3]);
@ -339,18 +341,18 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], alpha); /* B */
}
break;
case PIPE_BLENDFACTOR_SRC1_COLOR:
assert(0); /* to do */
break;
case PIPE_BLENDFACTOR_SRC1_ALPHA:
assert(0); /* to do */
break;
case PIPE_BLENDFACTOR_ZERO:
VEC4_COPY(source[0], zero); /* R */
VEC4_COPY(source[1], zero); /* G */
VEC4_COPY(source[2], zero); /* B */
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
case PIPE_BLENDFACTOR_SRC1_COLOR:
assert(0); /* to do */
break;
case PIPE_BLENDFACTOR_SRC1_ALPHA:
assert(0); /* to do */
break;
case PIPE_BLENDFACTOR_ZERO:
VEC4_COPY(source[0], zero); /* R */
VEC4_COPY(source[1], zero); /* G */
VEC4_COPY(source[2], zero); /* B */
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
@ -361,7 +363,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
}
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
{
float inv_alpha[4];
VEC4_SUB(inv_alpha, one, quadColor[3]);
@ -370,7 +372,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
}
break;
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
{
float inv_alpha[4];
VEC4_SUB(inv_alpha, one, dest[3]);
@ -379,7 +381,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
}
break;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
case PIPE_BLENDFACTOR_INV_DST_COLOR:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, dest[0]); /* R */
@ -390,7 +392,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
}
break;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
{
float inv_comp[4];
/* R */
@ -404,7 +406,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_comp);
}
break;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
{
float inv_alpha[4];
VEC4_SCALAR(inv_alpha, 1.0f - llvmpipe->blend_color.color[3]);
@ -413,73 +415,73 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
}
break;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
assert(0); /* to do */
break;
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
assert(0); /* to do */
break;
default:
assert(0);
}
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
assert(0); /* to do */
break;
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
assert(0); /* to do */
break;
default:
assert(0);
}
/*
* Compute src/first term A
*/
switch (llvmpipe->blend->alpha_src_factor) {
case PIPE_BLENDFACTOR_ONE:
VEC4_COPY(source[3], quadColor[3]); /* A */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_SRC_ALPHA:
/*
* Compute src/first term A
*/
switch (llvmpipe->blend->alpha_src_factor) {
case PIPE_BLENDFACTOR_ONE:
VEC4_COPY(source[3], quadColor[3]); /* A */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_SRC_ALPHA:
{
const float *alpha = quadColor[3];
VEC4_MUL(source[3], quadColor[3], alpha); /* A */
}
break;
case PIPE_BLENDFACTOR_DST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_DST_ALPHA:
VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
/* multiply alpha by 1.0 */
VEC4_COPY(source[3], quadColor[3]); /* A */
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_CONST_ALPHA:
case PIPE_BLENDFACTOR_DST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_DST_ALPHA:
VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
/* multiply alpha by 1.0 */
VEC4_COPY(source[3], quadColor[3]); /* A */
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_CONST_ALPHA:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
VEC4_MUL(source[3], quadColor[3], comp); /* A */
}
break;
case PIPE_BLENDFACTOR_ZERO:
VEC4_COPY(source[3], zero); /* A */
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
case PIPE_BLENDFACTOR_ZERO:
VEC4_COPY(source[3], zero); /* A */
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
{
float inv_alpha[4];
VEC4_SUB(inv_alpha, one, quadColor[3]);
VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
}
break;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
case PIPE_BLENDFACTOR_INV_DST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
{
float inv_alpha[4];
VEC4_SUB(inv_alpha, one, dest[3]);
VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
}
break;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
{
float inv_comp[4];
/* A */
@ -487,42 +489,42 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(source[3], quadColor[3], inv_comp);
}
break;
default:
assert(0);
}
default:
assert(0);
}
/*
* Compute dest/second term RGB
*/
switch (llvmpipe->blend->rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
/* dest = dest * 1 NO-OP, leave dest as-is */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
break;
case PIPE_BLENDFACTOR_DST_ALPHA:
VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
break;
case PIPE_BLENDFACTOR_DST_COLOR:
VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
assert(0); /* illegal */
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
/*
* Compute dest/second term RGB
*/
switch (llvmpipe->blend->rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
/* dest = dest * 1 NO-OP, leave dest as-is */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
break;
case PIPE_BLENDFACTOR_DST_ALPHA:
VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
break;
case PIPE_BLENDFACTOR_DST_COLOR:
VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
assert(0); /* illegal */
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[0]); /* R */
@ -533,7 +535,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], comp); /* B */
}
break;
case PIPE_BLENDFACTOR_CONST_ALPHA:
case PIPE_BLENDFACTOR_CONST_ALPHA:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
@ -542,17 +544,17 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], comp); /* B */
}
break;
case PIPE_BLENDFACTOR_ZERO:
VEC4_COPY(dest[0], zero); /* R */
VEC4_COPY(dest[1], zero); /* G */
VEC4_COPY(dest[2], zero); /* B */
break;
case PIPE_BLENDFACTOR_SRC1_COLOR:
case PIPE_BLENDFACTOR_SRC1_ALPHA:
/* XXX what are these? */
assert(0);
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
case PIPE_BLENDFACTOR_ZERO:
VEC4_COPY(dest[0], zero); /* R */
VEC4_COPY(dest[1], zero); /* G */
VEC4_COPY(dest[2], zero); /* B */
break;
case PIPE_BLENDFACTOR_SRC1_COLOR:
case PIPE_BLENDFACTOR_SRC1_ALPHA:
/* XXX what are these? */
assert(0);
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
@ -563,7 +565,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
}
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
{
float one_minus_alpha[QUAD_SIZE];
VEC4_SUB(one_minus_alpha, one, quadColor[3]);
@ -572,7 +574,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
}
break;
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, dest[3]); /* A */
@ -581,7 +583,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
}
break;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
case PIPE_BLENDFACTOR_INV_DST_COLOR:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, dest[0]); /* R */
@ -592,7 +594,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
}
break;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
{
float inv_comp[4];
/* R */
@ -606,7 +608,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], inv_comp);
}
break;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
{
float inv_comp[4];
VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
@ -615,138 +617,154 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
VEC4_MUL(dest[2], dest[2], inv_comp);
}
break;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
/* XXX what are these? */
assert(0);
break;
default:
assert(0);
}
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
/* XXX what are these? */
assert(0);
break;
default:
assert(0);
}
/*
* Compute dest/second term A
*/
switch (llvmpipe->blend->alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
/* dest = dest * 1 NO-OP, leave dest as-is */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_SRC_ALPHA:
VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
break;
case PIPE_BLENDFACTOR_DST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_DST_ALPHA:
VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
assert(0); /* illegal */
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_CONST_ALPHA:
/*
* Compute dest/second term A
*/
switch (llvmpipe->blend->alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
/* dest = dest * 1 NO-OP, leave dest as-is */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_SRC_ALPHA:
VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
break;
case PIPE_BLENDFACTOR_DST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_DST_ALPHA:
VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
assert(0); /* illegal */
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_CONST_ALPHA:
{
float comp[4];
VEC4_SCALAR(comp, llvmpipe->blend_color.color[3]); /* A */
VEC4_MUL(dest[3], dest[3], comp); /* A */
}
break;
case PIPE_BLENDFACTOR_ZERO:
VEC4_COPY(dest[3], zero); /* A */
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
case PIPE_BLENDFACTOR_ZERO:
VEC4_COPY(dest[3], zero); /* A */
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
{
float one_minus_alpha[QUAD_SIZE];
VEC4_SUB(one_minus_alpha, one, quadColor[3]);
VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
}
break;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
case PIPE_BLENDFACTOR_INV_DST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
{
float inv_comp[4];
VEC4_SUB(inv_comp, one, dest[3]); /* A */
VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
}
break;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
/* fall-through */
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
{
float inv_comp[4];
VEC4_SCALAR(inv_comp, 1.0f - llvmpipe->blend_color.color[3]);
VEC4_MUL(dest[3], dest[3], inv_comp);
}
break;
default:
assert(0);
}
default:
assert(0);
}
/*
* Combine RGB terms
*/
switch (llvmpipe->blend->rgb_func) {
case PIPE_BLEND_ADD:
VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
break;
case PIPE_BLEND_SUBTRACT:
VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
break;
case PIPE_BLEND_REVERSE_SUBTRACT:
VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
break;
case PIPE_BLEND_MIN:
VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
break;
case PIPE_BLEND_MAX:
VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
break;
default:
assert(0);
}
/*
* Combine RGB terms
*/
switch (llvmpipe->blend->rgb_func) {
case PIPE_BLEND_ADD:
VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
break;
case PIPE_BLEND_SUBTRACT:
VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
break;
case PIPE_BLEND_REVERSE_SUBTRACT:
VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
break;
case PIPE_BLEND_MIN:
VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
break;
case PIPE_BLEND_MAX:
VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
break;
default:
assert(0);
}
/*
* Combine A terms
*/
switch (llvmpipe->blend->alpha_func) {
case PIPE_BLEND_ADD:
VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
break;
case PIPE_BLEND_SUBTRACT:
VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
break;
case PIPE_BLEND_REVERSE_SUBTRACT:
VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
break;
case PIPE_BLEND_MIN:
VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
break;
case PIPE_BLEND_MAX:
VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
break;
default:
assert(0);
/*
* Combine A terms
*/
switch (llvmpipe->blend->alpha_func) {
case PIPE_BLEND_ADD:
VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
break;
case PIPE_BLEND_SUBTRACT:
VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
break;
case PIPE_BLEND_REVERSE_SUBTRACT:
VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
break;
case PIPE_BLEND_MIN:
VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
break;
case PIPE_BLEND_MAX:
VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
break;
default:
assert(0);
}
}
} /* cbuf loop */
}
static void
blend_quad(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
if (llvmpipe->blend->logicop_enable) {
logicop_quad(qs, quads, nr);
}
else if (llvmpipe->blend->blend_enable) {
blend_quads(qs, quads, nr );
}
/* pass blended quad to next stage */
qs->next->run(qs->next, quad);
qs->next->run(qs->next, quads, nr);
}

View file

@ -84,12 +84,23 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad)
if (!(llvmpipe->blend->colormask & PIPE_MASK_A))
COPY_4V(quadColor[3], dest[3]);
}
}
static void
colormask_quads(struct quad_stage *qs, struct quad_header *quads[],
unsigned nr)
{
unsigned i;
for (i = 0; i < nr; i++)
colormask_quad(qs, quads[i]);
/* pass quad to next stage */
qs->next->run(qs->next, quad);
qs->next->run(qs->next, quads, nr);
}
static void colormask_begin(struct quad_stage *qs)
{
qs->next->begin(qs->next);
@ -108,7 +119,7 @@ struct quad_stage *lp_quad_colormask_stage( struct llvmpipe_context *llvmpipe )
stage->llvmpipe = llvmpipe;
stage->begin = colormask_begin;
stage->run = colormask_quad;
stage->run = colormask_quads;
stage->destroy = colormask_destroy;
return stage;

View file

@ -42,33 +42,47 @@
/**
* Multiply quad's alpha values by the fragment coverage.
*/
static void
static INLINE void
coverage_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
const uint prim = quad->input.prim;
uint cbuf;
/* loop over colorbuffer outputs */
for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
float (*quadColor)[4] = quad->output.color[cbuf];
unsigned j;
for (j = 0; j < QUAD_SIZE; j++) {
assert(quad->input.coverage[j] >= 0.0);
assert(quad->input.coverage[j] <= 1.0);
quadColor[3][j] *= quad->input.coverage[j];
}
}
}
/* XXX: Incorporate into shader after alpha_test.
*/
static void
coverage_run(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
const uint prim = quads[0]->input.prim;
unsigned i;
if ((llvmpipe->rasterizer->poly_smooth && prim == QUAD_PRIM_TRI) ||
(llvmpipe->rasterizer->line_smooth && prim == QUAD_PRIM_LINE) ||
(llvmpipe->rasterizer->point_smooth && prim == QUAD_PRIM_POINT)) {
uint cbuf;
/* loop over colorbuffer outputs */
for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
float (*quadColor)[4] = quad->output.color[cbuf];
unsigned j;
for (j = 0; j < QUAD_SIZE; j++) {
assert(quad->input.coverage[j] >= 0.0);
assert(quad->input.coverage[j] <= 1.0);
quadColor[3][j] *= quad->input.coverage[j];
}
}
for (i = 0; i < nr; i++)
coverage_quad( qs, quads[i] );
}
qs->next->run(qs->next, quad);
qs->next->run(qs->next, quads, nr);
}
static void coverage_begin(struct quad_stage *qs)
{
qs->next->begin(qs->next);
@ -87,7 +101,7 @@ struct quad_stage *lp_quad_coverage_stage( struct llvmpipe_context *llvmpipe )
stage->llvmpipe = llvmpipe;
stage->begin = coverage_begin;
stage->run = coverage_quad;
stage->run = coverage_run;
stage->destroy = coverage_destroy;
return stage;

View file

@ -49,7 +49,7 @@
* Try to effectively do that with codegen...
*/
void
boolean
lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
@ -193,6 +193,8 @@ lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
}
quad->inout.mask &= zmask;
if (quad->inout.mask == 0)
return FALSE;
if (llvmpipe->depth_stencil->depth.writemask) {
@ -252,16 +254,25 @@ lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
assert(0);
}
}
return TRUE;
}
static void
depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
depth_test_quads(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
lp_depth_test_quad(qs, quad);
unsigned i, pass = 0;
if (quad->inout.mask)
qs->next->run(qs->next, quad);
for (i = 0; i < nr; i++) {
if (lp_depth_test_quad(qs, quads[i]))
quads[pass++] = quads[i];
}
if (pass)
qs->next->run(qs->next, quads, pass);
}
@ -283,7 +294,7 @@ struct quad_stage *lp_quad_depth_test_stage( struct llvmpipe_context *llvmpipe )
stage->llvmpipe = llvmpipe;
stage->begin = depth_test_begin;
stage->run = depth_test_quad;
stage->run = depth_test_quads;
stage->destroy = depth_test_destroy;
return stage;

View file

@ -43,20 +43,26 @@
static void
earlyz_quad(
struct quad_stage *qs,
struct quad_header *quad )
struct quad_header *quads[],
unsigned nr )
{
const float fx = (float) quad->input.x0;
const float fy = (float) quad->input.y0;
const float dzdx = quad->posCoef->dadx[2];
const float dzdy = quad->posCoef->dady[2];
const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
const float a0z = quads[0]->posCoef->a0[2];
const float dzdx = quads[0]->posCoef->dadx[2];
const float dzdy = quads[0]->posCoef->dady[2];
unsigned i;
quad->output.depth[0] = z0;
quad->output.depth[1] = z0 + dzdx;
quad->output.depth[2] = z0 + dzdy;
quad->output.depth[3] = z0 + dzdx + dzdy;
for (i = 0; i < nr; i++) {
const float fx = (float) quads[i]->input.x0;
const float fy = (float) quads[i]->input.y0;
const float z0 = a0z + dzdx * fx + dzdy * fy;
qs->next->run( qs->next, quad );
quads[i]->output.depth[0] = z0;
quads[i]->output.depth[1] = z0 + dzdx;
quads[i]->output.depth[2] = z0 + dzdy;
quads[i]->output.depth[3] = z0 + dzdx + dzdy;
}
qs->next->run( qs->next, quads, nr );
}
static void

View file

@ -68,21 +68,18 @@ quad_shade_stage(struct quad_stage *qs)
/**
* Execute fragment shader for the four fragments in the quad.
*/
static void
static boolean
shade_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct quad_shade_stage *qss = quad_shade_stage( qs );
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
struct tgsi_exec_machine *machine = qss->machine;
boolean z_written;
/* Consts do not require 16 byte alignment. */
machine->Consts = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
machine->InterpCoefs = quad->coef;
/* run shader */
quad->inout.mask &= llvmpipe->fs->run( llvmpipe->fs, machine, quad );
if (quad->inout.mask == 0)
return FALSE;
/* store outputs */
z_written = FALSE;
@ -129,12 +126,35 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
quad->output.depth[3] = z0 + dzdx + dzdy;
}
/* shader may cull fragments */
if (quad->inout.mask) {
qs->next->run( qs->next, quad );
}
return TRUE;
}
static void
shade_quads(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
struct quad_shade_stage *qss = quad_shade_stage( qs );
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
struct tgsi_exec_machine *machine = qss->machine;
unsigned i, pass = 0;
machine->Consts = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
machine->InterpCoefs = quads[0]->coef;
for (i = 0; i < nr; i++) {
if (shade_quad(qs, quads[i]))
quads[pass++] = quads[i];
}
if (pass)
qs->next->run(qs->next, quads, pass);
}
/**
* Per-primitive (or per-begin?) setup
@ -174,7 +194,7 @@ lp_quad_shade_stage( struct llvmpipe_context *llvmpipe )
qss->stage.llvmpipe = llvmpipe;
qss->stage.begin = shade_begin;
qss->stage.run = shade_quad;
qss->stage.run = shade_quads;
qss->stage.destroy = shade_destroy;
qss->machine = tgsi_exec_machine_create();

View file

@ -50,13 +50,15 @@ static unsigned count_bits( unsigned val )
}
static void
occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad)
occlusion_count_quads(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
unsigned i;
llvmpipe->occlusion_count += count_bits(quad->inout.mask);
for (i = 0; i < nr; i++)
llvmpipe->occlusion_count += count_bits(quads[i]->inout.mask);
qs->next->run(qs->next, quad);
qs->next->run(qs->next, quads, nr);
}
@ -78,7 +80,7 @@ struct quad_stage *lp_quad_occlusion_stage( struct llvmpipe_context *llvmpipe )
stage->llvmpipe = llvmpipe;
stage->begin = occlusion_begin;
stage->run = occlusion_count_quad;
stage->run = occlusion_count_quads;
stage->destroy = occlusion_destroy;
return stage;

View file

@ -38,11 +38,8 @@
* taking mask into account.
*/
static void
output_quad(struct quad_stage *qs, struct quad_header *quad)
output_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
{
/* in-tile pos: */
const int itx = quad->input.x0 % TILE_SIZE;
const int ity = quad->input.y0 % TILE_SIZE;
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
uint cbuf;
@ -51,25 +48,35 @@ output_quad(struct quad_stage *qs, struct quad_header *quad)
for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
struct llvmpipe_cached_tile *tile
= lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
quad->input.x0, quad->input.y0);
float (*quadColor)[4] = quad->output.color[cbuf];
int i, j;
quads[0]->input.x0,
quads[0]->input.y0);
int i, j, q;
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
if (quad->inout.mask & (1 << j)) {
int x = itx + (j & 1);
int y = ity + (j >> 1);
for (i = 0; i < 4; i++) { /* loop over color chans */
tile->data.color[y][x][i] = quadColor[i][j];
}
if (0) {
debug_printf("lp write pixel %d,%d: %g, %g, %g\n",
quad->input.x0 + x,
quad->input.y0 + y,
quadColor[0][j],
quadColor[1][j],
quadColor[2][j]);
for (q = 0; q < nr; q++) {
struct quad_header *quad = quads[q];
float (*quadColor)[4] = quad->output.color[cbuf];
/* in-tile pos: */
const int itx = quad->input.x0 % TILE_SIZE;
const int ity = quad->input.y0 % TILE_SIZE;
for (j = 0; j < QUAD_SIZE; j++) {
if (quad->inout.mask & (1 << j)) {
int x = itx + (j & 1);
int y = ity + (j >> 1);
for (i = 0; i < 4; i++) { /* loop over color chans */
tile->data.color[y][x][i] = quadColor[i][j];
}
if (0) {
debug_printf("lp write pixel %d,%d: %g, %g, %g\n",
quad->input.x0 + x,
quad->input.y0 + y,
quadColor[0][j],
quadColor[1][j],
quadColor[2][j]);
}
}
}
}

View file

@ -55,50 +55,52 @@ void
lp_build_quad_pipeline(struct llvmpipe_context *lp)
{
boolean early_depth_test =
lp->depth_stencil->depth.enabled &&
lp->framebuffer.zsbuf &&
!lp->depth_stencil->alpha.enabled &&
!lp->fs->info.uses_kill &&
!lp->fs->info.writes_z;
lp->depth_stencil->depth.enabled &&
lp->framebuffer.zsbuf &&
!lp->depth_stencil->alpha.enabled &&
!lp->fs->info.uses_kill &&
!lp->fs->info.writes_z;
/* build up the pipeline in reverse order... */
lp->quad.first = lp->quad.output;
if (lp->blend->colormask != 0xf) {
lp_push_quad_first( lp, lp->quad.colormask );
}
/* Color combine
*/
lp->quad.first = lp->quad.output;
if (lp->blend->blend_enable ||
lp->blend->logicop_enable) {
lp_push_quad_first( lp, lp->quad.blend );
}
if (lp->blend->colormask != 0xf) {
lp_push_quad_first( lp, lp->quad.colormask );
}
if (lp->active_query_count) {
lp_push_quad_first( lp, lp->quad.occlusion );
}
if (lp->blend->blend_enable ||
lp->blend->logicop_enable) {
lp_push_quad_first( lp, lp->quad.blend );
}
if (lp->rasterizer->poly_smooth ||
lp->rasterizer->line_smooth ||
lp->rasterizer->point_smooth) {
lp_push_quad_first( lp, lp->quad.coverage );
}
if (lp->rasterizer->poly_smooth ||
lp->rasterizer->line_smooth ||
lp->rasterizer->point_smooth) {
lp_push_quad_first( lp, lp->quad.coverage );
}
if (!early_depth_test) {
lp_build_depth_stencil( lp );
}
/* Shade/Depth/Stencil/Alpha
*/
if (lp->active_query_count) {
lp_push_quad_first( lp, lp->quad.occlusion );
}
if (lp->depth_stencil->alpha.enabled) {
lp_push_quad_first( lp, lp->quad.alpha_test );
}
if (!early_depth_test) {
lp_build_depth_stencil( lp );
}
/* XXX always enable shader? */
if (1) {
lp_push_quad_first( lp, lp->quad.shade );
}
if (lp->depth_stencil->alpha.enabled) {
lp_push_quad_first( lp, lp->quad.alpha_test );
}
if (early_depth_test) {
lp_build_depth_stencil( lp );
lp_push_quad_first( lp, lp->quad.earlyz );
}
lp_push_quad_first( lp, lp->quad.shade );
if (early_depth_test) {
lp_build_depth_stencil( lp );
lp_push_quad_first( lp, lp->quad.earlyz );
}
}

View file

@ -49,7 +49,7 @@ struct quad_stage {
void (*begin)(struct quad_stage *qs);
/** the stage action */
void (*run)(struct quad_stage *qs, struct quad_header *quad);
void (*run)(struct quad_stage *qs, struct quad_header *quad[], unsigned nr);
void (*destroy)(struct quad_stage *qs);
};
@ -69,6 +69,6 @@ struct quad_stage *lp_quad_output_stage( struct llvmpipe_context *llvmpipe );
void lp_build_quad_pipeline(struct llvmpipe_context *lp);
void lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad);
boolean lp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad);
#endif /* LP_QUAD_PIPE_H */

View file

@ -198,7 +198,8 @@ apply_stencil_op(ubyte stencilVals[QUAD_SIZE],
* depth testing.
*/
static void
stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
stencil_test_quad(struct quad_stage *qs, struct quad_header *quads[],
unsigned nr)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
struct pipe_surface *ps = llvmpipe->framebuffer.zsbuf;
@ -206,9 +207,12 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
ubyte ref, wrtMask, valMask;
ubyte stencilVals[QUAD_SIZE];
struct llvmpipe_cached_tile *tile
= lp_get_cached_tile(llvmpipe->zsbuf_cache, quad->input.x0, quad->input.y0);
uint j;
uint face = quad->input.facing;
= lp_get_cached_tile(llvmpipe->zsbuf_cache,
quads[0]->input.x0,
quads[0]->input.y0);
uint face = quads[0]->input.facing;
uint pass = 0;
uint j, q;
if (!llvmpipe->depth_stencil->stencil[1].enabled) {
/* single-sided stencil test, use front (face=0) state */
@ -227,103 +231,110 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
assert(ps); /* shouldn't get here if there's no stencil buffer */
/* get stencil values from cached tile */
switch (ps->format) {
case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
stencilVals[j] = tile->data.depth32[y][x] >> 24;
}
break;
case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
stencilVals[j] = tile->data.depth32[y][x] & 0xff;
}
break;
case PIPE_FORMAT_S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
stencilVals[j] = tile->data.stencil8[y][x];
}
break;
default:
assert(0);
}
for (q = 0; q < nr; q++) {
struct quad_header *quad = quads[q];
/* do the stencil test first */
{
unsigned passMask, failMask;
passMask = do_stencil_test(stencilVals, func, ref, valMask);
failMask = quad->inout.mask & ~passMask;
quad->inout.mask &= passMask;
if (failOp != PIPE_STENCIL_OP_KEEP) {
apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask);
}
}
if (quad->inout.mask) {
/* now the pixels that passed the stencil test are depth tested */
if (llvmpipe->depth_stencil->depth.enabled) {
const unsigned origMask = quad->inout.mask;
lp_depth_test_quad(qs, quad); /* quad->mask is updated */
/* update stencil buffer values according to z pass/fail result */
if (zFailOp != PIPE_STENCIL_OP_KEEP) {
const unsigned failMask = origMask & ~quad->inout.mask;
apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask);
/* get stencil values from cached tile */
switch (ps->format) {
case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
stencilVals[j] = tile->data.depth32[y][x] >> 24;
}
break;
case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
stencilVals[j] = tile->data.depth32[y][x] & 0xff;
}
break;
case PIPE_FORMAT_S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
stencilVals[j] = tile->data.stencil8[y][x];
}
break;
default:
assert(0);
}
if (zPassOp != PIPE_STENCIL_OP_KEEP) {
const unsigned passMask = origMask & quad->inout.mask;
apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask);
/* do the stencil test first */
{
unsigned passMask, failMask;
passMask = do_stencil_test(stencilVals, func, ref, valMask);
failMask = quad->inout.mask & ~passMask;
quad->inout.mask &= passMask;
if (failOp != PIPE_STENCIL_OP_KEEP) {
apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask);
}
}
else {
/* no depth test, apply Zpass operator to stencil buffer values */
apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask);
if (quad->inout.mask) {
/* now the pixels that passed the stencil test are depth tested */
if (llvmpipe->depth_stencil->depth.enabled) {
const unsigned origMask = quad->inout.mask;
lp_depth_test_quad(qs, quad); /* quad->mask is updated */
/* update stencil buffer values according to z pass/fail result */
if (zFailOp != PIPE_STENCIL_OP_KEEP) {
const unsigned failMask = origMask & ~quad->inout.mask;
apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask);
}
if (zPassOp != PIPE_STENCIL_OP_KEEP) {
const unsigned passMask = origMask & quad->inout.mask;
apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask);
}
}
else {
/* no depth test, apply Zpass operator to stencil buffer values */
apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask);
}
}
/* put new stencil values into cached tile */
switch (ps->format) {
case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
uint s8z24 = tile->data.depth32[y][x];
s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff);
tile->data.depth32[y][x] = s8z24;
}
break;
case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
uint z24s8 = tile->data.depth32[y][x];
z24s8 = (z24s8 & 0xffffff00) | stencilVals[j];
tile->data.depth32[y][x] = z24s8;
}
break;
case PIPE_FORMAT_S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.stencil8[y][x] = stencilVals[j];
}
break;
default:
assert(0);
}
if (quad->inout.mask)
quads[pass++] = q;
}
/* put new stencil values into cached tile */
switch (ps->format) {
case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
uint s8z24 = tile->data.depth32[y][x];
s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff);
tile->data.depth32[y][x] = s8z24;
}
break;
case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
uint z24s8 = tile->data.depth32[y][x];
z24s8 = (z24s8 & 0xffffff00) | stencilVals[j];
tile->data.depth32[y][x] = z24s8;
}
break;
case PIPE_FORMAT_S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.stencil8[y][x] = stencilVals[j];
}
break;
default:
assert(0);
}
if (quad->inout.mask)
qs->next->run(qs->next, quad);
if (pass)
qs->next->run(qs->next, quads, pass);
}

View file

@ -14,40 +14,46 @@
* Apply polygon stipple to quads produced by triangle rasterization
*/
static void
stipple_quad(struct quad_stage *qs, struct quad_header *quad)
stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
{
static const uint bit31 = 1 << 31;
static const uint bit30 = 1 << 30;
unsigned pass = nr;
if (quad->input.prim == QUAD_PRIM_TRI) {
if (quads[0]->input.prim == QUAD_PRIM_TRI) {
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
/* need to invert Y to index into OpenGL's stipple pattern */
const int col0 = quad->input.x0 % 32;
const int y0 = quad->input.y0;
const int y1 = y0 + 1;
const uint stipple0 = llvmpipe->poly_stipple.stipple[y0 % 32];
const uint stipple1 = llvmpipe->poly_stipple.stipple[y1 % 32];
unsigned q;
/* turn off quad mask bits that fail the stipple test */
if ((stipple0 & (bit31 >> col0)) == 0)
quad->inout.mask &= ~MASK_TOP_LEFT;
pass = 0;
if ((stipple0 & (bit30 >> col0)) == 0)
quad->inout.mask &= ~MASK_TOP_RIGHT;
for (q = 0; q < nr; q++) {
struct quad_header *quad = quads[q];
if ((stipple1 & (bit31 >> col0)) == 0)
quad->inout.mask &= ~MASK_BOTTOM_LEFT;
const int col0 = quad->input.x0 % 32;
const int y0 = quad->input.y0;
const int y1 = y0 + 1;
const uint stipple0 = llvmpipe->poly_stipple.stipple[y0 % 32];
const uint stipple1 = llvmpipe->poly_stipple.stipple[y1 % 32];
if ((stipple1 & (bit30 >> col0)) == 0)
quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
/* turn off quad mask bits that fail the stipple test */
if ((stipple0 & (bit31 >> col0)) == 0)
quad->inout.mask &= ~MASK_TOP_LEFT;
if (!quad->inout.mask) {
/* all fragments failed stipple test, end of quad pipeline */
return;
if ((stipple0 & (bit30 >> col0)) == 0)
quad->inout.mask &= ~MASK_TOP_RIGHT;
if ((stipple1 & (bit31 >> col0)) == 0)
quad->inout.mask &= ~MASK_BOTTOM_LEFT;
if ((stipple1 & (bit30 >> col0)) == 0)
quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
if (quad->inout.mask)
quads[pass++] = quad;
}
}
qs->next->run(qs->next, quad);
qs->next->run(qs->next, quads, pass);
}

View file

@ -172,7 +172,7 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
if (quad->inout.mask) {
struct llvmpipe_context *lp = setup->llvmpipe;
lp->quad.first->run( lp->quad.first, quad );
lp->quad.first->run( lp->quad.first, &quad, 1 );
}
}
@ -193,7 +193,7 @@ emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
if (mask & 4) setup->numFragsEmitted++;
if (mask & 8) setup->numFragsEmitted++;
#endif
lp->quad.first->run( lp->quad.first, quad );
lp->quad.first->run( lp->quad.first, &quad, 1 );
#if DEBUG_FRAGS
mask = quad->inout.mask;
if (mask & 1) setup->numFragsWritten++;