cell: Initial code-gen for alpha / stencil / depth testing

Alpha test is currently broken because all per-fragment testing occurs
before alpha is calculated.

Stencil test is currently broken because the Z-clear code asserts if
there is a stencil buffer.
This commit is contained in:
Ian Romanick 2008-03-17 15:45:52 -07:00
parent 0c715de39f
commit 1936e4bdfd
15 changed files with 1409 additions and 170 deletions

View file

@ -104,6 +104,16 @@
/**
*/
struct cell_command_depth_stencil_alpha_test {
uint64_t base; /**< Effective address of code start. */
unsigned size; /**< Size in bytes of test code. */
unsigned read_depth; /**< Flag: should depth be read? */
unsigned read_stencil; /**< Flag: should stencil be read? */
};
/**
* Tell SPUs about the framebuffer size, location
*/

View file

@ -27,6 +27,7 @@ SOURCES = \
cell_flush.c \
cell_state_derived.c \
cell_state_emit.c \
cell_state_per_fragment.c \
cell_state_shader.c \
cell_pipe_state.c \
cell_screen.c \

View file

@ -57,16 +57,37 @@ struct cell_fragment_shader_state
};
struct cell_blend_state {
struct pipe_blend_state base;
/**
* Generated code to perform alpha blending
*/
struct spe_function code;
};
struct cell_depth_stencil_alpha_state {
struct pipe_depth_stencil_alpha_state base;
/**
* Generated code to perform alpha, stencil, and depth testing on the SPE
*/
struct spe_function code;
};
struct cell_context
{
struct pipe_context pipe;
struct cell_winsys *winsys;
const struct pipe_blend_state *blend;
const struct cell_blend_state *blend;
const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
uint num_samplers;
const struct pipe_depth_stencil_alpha_state *depth_stencil;
const struct cell_depth_stencil_alpha_state *depth_stencil;
const struct pipe_rasterizer_state *rasterizer;
const struct cell_vertex_shader_state *vs;
const struct cell_fragment_shader_state *fs;

View file

@ -36,6 +36,7 @@
#include "cell_context.h"
#include "cell_state.h"
#include "cell_texture.h"
#include "cell_state_per_fragment.h"
@ -43,7 +44,12 @@ static void *
cell_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *blend)
{
return mem_dup(blend, sizeof(*blend));
struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state));
(void) memcpy(cb, blend, sizeof(*blend));
cb->code.store = NULL;
return cb;
}
@ -54,7 +60,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend)
draw_flush(cell->draw);
cell->blend = (const struct pipe_blend_state *)blend;
cell->blend = (const struct cell_blend_state *)blend;
cell->dirty |= CELL_NEW_BLEND;
}
@ -63,7 +69,10 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend)
static void
cell_delete_blend_state(struct pipe_context *pipe, void *blend)
{
FREE(blend);
struct cell_blend_state *cb = (struct cell_blend_state *) blend;
spe_release_func(& cb->code);
FREE(cb);
}
@ -87,7 +96,13 @@ static void *
cell_create_depth_stencil_alpha_state(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *depth_stencil)
{
return mem_dup(depth_stencil, sizeof(*depth_stencil));
struct cell_depth_stencil_alpha_state *cdsa =
MALLOC(sizeof(struct cell_depth_stencil_alpha_state));
(void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil));
cdsa->code.store = NULL;
return cdsa;
}
@ -96,12 +111,16 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
void *depth_stencil)
{
struct cell_context *cell = cell_context(pipe);
struct cell_depth_stencil_alpha_state *cdsa =
(struct cell_depth_stencil_alpha_state *) depth_stencil;
draw_flush(cell->draw);
cell->depth_stencil
= (const struct pipe_depth_stencil_alpha_state *) depth_stencil;
if (cdsa->code.store == NULL) {
cell_generate_depth_stencil_test(cdsa);
}
cell->depth_stencil = cdsa;
cell->dirty |= CELL_NEW_DEPTH_STENCIL;
}
@ -109,7 +128,11 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
static void
cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth)
{
FREE(depth);
struct cell_depth_stencil_alpha_state *cdsa =
(struct cell_depth_stencil_alpha_state *) depth;
spe_release_func(& cdsa->code);
FREE(cdsa);
}

View file

@ -71,9 +71,27 @@ cell_emit_state(struct cell_context *cell)
}
if (cell->dirty & CELL_NEW_DEPTH_STENCIL) {
emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL,
cell->depth_stencil,
sizeof(struct pipe_depth_stencil_alpha_state));
struct cell_command_depth_stencil_alpha_test dsat;
dsat.base = (intptr_t) cell->depth_stencil->code.store;
dsat.size = (char *) cell->depth_stencil->code.csr
- (char *) cell->depth_stencil->code.store;
dsat.read_depth = TRUE;
dsat.read_stencil = FALSE;
{
uint32_t *p = cell->depth_stencil->code.store;
printf("\t.text\n");
for (/* empty */; p < cell->depth_stencil->code.csr; p++) {
printf("\t.long\t0x%04x\n", *p);
}
fflush(stdout);
}
emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat,
sizeof(dsat));
}
if (cell->dirty & CELL_NEW_SAMPLER) {

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,35 @@
/*
* (C) Copyright IBM Corporation 2008
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CELL_STATE_PER_FRAGMENT_H
#define CELL_STATE_PER_FRAGMENT_H
extern void
cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa);
extern void
cell_generate_alpha_blend(struct cell_blend_state *cb,
const struct pipe_blend_color *blend_color);
#endif /* CELL_STATE_PER_FRAGMENT_H */

View file

@ -19,6 +19,7 @@ SOURCES = \
spu_main.c \
spu_blend.c \
spu_dcache.c \
spu_per_fragment_op.c \
spu_render.c \
spu_texture.c \
spu_tile.c \

View file

@ -58,6 +58,9 @@ struct spu_vs_context draw;
static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX]
ALIGN16_ATTRIB;
static unsigned char depth_stencil_code_buffer[4 * 64]
ALIGN16_ATTRIB;
/**
* Tell the PPU that this SPU has finished copying a buffer to
* local store and that it may be reused by the PPU.
@ -248,14 +251,26 @@ cmd_state_blend(const struct pipe_blend_state *state)
static void
cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state)
cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state)
{
if (Debug)
printf("SPU %u: DEPTH_STENCIL: ztest %d\n",
spu.init.id,
state->depth.enabled);
state->read_depth);
memcpy(&spu.depth_stencil, state, sizeof(*state));
ASSERT_ALIGN16(state->base);
mfc_get(depth_stencil_code_buffer,
(unsigned int) state->base, /* src */
ROUNDUP16(state->size),
TAG_BATCH_BUFFER,
0, /* tid */
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
spu.frag_test = (frag_test_func) depth_stencil_code_buffer;
spu.read_depth = state->read_depth;
spu.read_stencil = state->read_stencil;
}
@ -415,9 +430,9 @@ cmd_batch(uint opcode)
pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8);
break;
case CELL_CMD_STATE_DEPTH_STENCIL:
cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *)
cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *)
&buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8);
pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8);
break;
case CELL_CMD_STATE_SAMPLER:
cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]);

View file

@ -56,6 +56,17 @@ typedef union {
#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
struct spu_frag_test_results {
qword mask;
qword depth;
qword stencil;
};
typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask,
qword pixel_depth, qword pixel_stencil, qword frag_depth,
qword frag_alpha, qword facing);
struct spu_framebuffer {
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
@ -79,8 +90,9 @@ struct spu_global
struct cell_init_info init;
struct spu_framebuffer fb;
struct pipe_blend_state blend_stencil;
struct pipe_depth_stencil_alpha_state depth_stencil;
boolean read_depth;
boolean read_stencil;
frag_test_func frag_test;
struct pipe_blend_state blend;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
struct cell_command_texture texture;

View file

@ -0,0 +1,191 @@
/*
* (C) Copyright IBM Corporation 2008
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file spu_per_fragment_op.c
* SPU implementation various per-fragment operations.
*
* \author Ian Romanick <idr@us.ibm.com>
*/
#include "pipe/p_format.h"
#include "spu_main.h"
#include "spu_per_fragment_op.h"
#define ZERO 0x80
static void
read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
enum pipe_format depth_format, qword *depth,
qword *stencil)
{
const int ix = x / 2;
const int iy = y / 2;
switch (depth_format) {
case PIPE_FORMAT_Z16_UNORM: {
qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
const qword shuf_vec = (qword) {
ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7
};
/* At even X values we want the first 4 shorts, and at odd X values we
* want the second 4 shorts.
*/
qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3));
qword bias_mask = si_fsmbi(0x3333);
qword sv = si_a(shuf_vec, si_and(bias_mask, bias));
*depth = si_shufb(*ptr, *ptr, sv);
*stencil = si_il(0);
break;
}
case PIPE_FORMAT_Z32_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
*depth = *ptr;
*stencil = si_il(0);
break;
}
case PIPE_FORMAT_Z24S8_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
qword mask = si_fsmbi(0x7777);
*depth = si_and(*ptr, mask);
*stencil = si_rotmai(si_andc(*ptr, mask), -24);
break;
}
default:
assert(0);
break;
}
}
static void
write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
enum pipe_format depth_format,
qword depth, qword stencil)
{
const int ix = x / 2;
const int iy = y / 2;
(void) stencil;
switch (depth_format) {
case PIPE_FORMAT_Z16_UNORM: {
qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
qword sv = ((ix & 0x01) == 0)
? (qword) { 2, 3, 6, 7, 10, 11, 14, 15,
24, 25, 26, 27, 28, 29, 30, 31 }
: (qword) { 16, 17, 18, 19, 20 , 21, 22, 23,
2, 3, 6, 7, 10, 11, 14, 15 };
*ptr = si_shufb(depth, *ptr, sv);
break;
}
case PIPE_FORMAT_Z32_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
*ptr = depth;
break;
}
case PIPE_FORMAT_Z24S8_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
qword mask = si_fsmbi(0x7777);
stencil = si_rotmai(stencil, 24);
*ptr = si_selb(stencil, depth, mask);
break;
}
default:
assert(0);
break;
}
}
qword
spu_do_depth_stencil(int x, int y,
qword frag_mask, qword frag_depth, qword frag_alpha,
qword facing)
{
struct spu_frag_test_results result;
qword pixel_depth;
qword pixel_stencil;
/* All of this preable code (everthing before the call to frag_test) should
* be generated on the PPU and upload to the SPU.
*/
if (spu.read_depth || spu.read_stencil) {
read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
&pixel_depth, &pixel_stencil);
}
switch (spu.fb.depth_format) {
case PIPE_FORMAT_Z16_UNORM:
frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu)));
frag_depth = si_cfltu(frag_depth, 0);
break;
case PIPE_FORMAT_Z32_UNORM:
frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu)));
frag_depth = si_cfltu(frag_depth, 0);
break;
case PIPE_FORMAT_Z24S8_UNORM:
frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu)));
frag_depth = si_cfltu(frag_depth, 0);
break;
default:
assert(0);
break;
}
result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil,
frag_depth, frag_alpha, facing);
/* This code (everthing after the call to frag_test) should
* be generated on the PPU and upload to the SPU.
*/
if (spu.read_depth || spu.read_stencil) {
write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
result.depth, result.stencil);
}
return result.mask;
}

View file

@ -0,0 +1,32 @@
/*
* (C) Copyright IBM Corporation 2008
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SPU_PER_FRAGMENT_OP
#define SPU_PER_FRAGMENT_OP
extern qword
spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth,
qword frag_alpha, qword facing);
#endif /* SPU_PER_FRAGMENT_OP */

View file

@ -98,7 +98,7 @@ my_tile(uint tx, uint ty)
static INLINE void
get_cz_tiles(uint tx, uint ty)
{
if (spu.depth_stencil.depth.enabled) {
if (spu.read_depth) {
if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
//printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
@ -153,7 +153,7 @@ static INLINE void
wait_put_cz_tiles(void)
{
wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
if (spu.depth_stencil.depth.enabled) {
if (spu.read_depth) {
wait_on_mask(1 << TAG_WRITE_TILE_Z);
}
}

View file

@ -38,8 +38,7 @@
#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_tri.h"
#include "spu_ztest.h"
#include "spu_per_fragment_op.h"
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
@ -264,16 +263,12 @@ do_depth_test(int x, int y, mask_t quadmask)
zvals.v = eval_z((float) x, (float) y);
if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
int ix = (x - setup.cliprect_minx) / 4;
int iy = (y - setup.cliprect_miny) / 2;
mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask);
}
else {
int ix = (x - setup.cliprect_minx) / 2;
int iy = (y - setup.cliprect_miny) / 2;
mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask);
}
mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx,
y - setup.cliprect_miny,
(qword) quadmask,
(qword) zvals.v,
(qword) spu_splats((unsigned char) 0x0ffu),
(qword) spu_splats((unsigned int) 0x01u));
if (spu_extract(spu_orx(mask), 0))
spu.cur_ztile_status = TILE_STATUS_DIRTY;
@ -299,7 +294,7 @@ emit_quad( int x, int y, mask_t mask )
sp->quad.first->run(sp->quad.first, &setup.quad);
#else
if (spu.depth_stencil.depth.enabled) {
if (spu.read_depth) {
mask = do_depth_test(x, y, mask);
}
@ -434,7 +429,7 @@ static void flush_spans( void )
}
ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
if (spu.depth_stencil.depth.enabled) {
if (spu.read_depth) {
if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
/* wait for mfc_get() to complete */
//printf("SPU: %u: waiting for ztile\n", spu.init.id);

View file

@ -1,135 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* Zbuffer/depth test code.
*/
#ifndef SPU_ZTEST_H
#define SPU_ZTEST_H
#ifdef __SPU__
#include <spu_intrinsics.h>
#endif
/**
* Perform Z testing for a 16-bit/value Z buffer.
*
* \param zvals vector of four fragment zvalues as floats
* \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this
* contains the Z values for 2 quads, 8 pixels.
* \param x x coordinate of quad (only lsbit is significant)
* \param inMask indicates which fragments in the quad are alive
* \return new mask indicating which fragments are alive after ztest
*/
static INLINE vector unsigned int
spu_z16_test_less(vector float zvals, vector unsigned short *zbuf,
uint x, vector unsigned int inMask)
{
#define ZERO 0x80
vector unsigned int zvals_ui4, zbuf_ui4, mask;
/* convert floats to uints in [0, 65535] */
zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */
zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */
/* XXX this conditional could be removed with a bit of work */
if (x & 1) {
/* convert zbuffer values from ushorts to uints */
/* gather lower four ushorts */
zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
(vector unsigned int) *zbuf,
((vector unsigned char) {
ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11,
ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15}));
/* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
/* mask &= inMask */
mask = spu_and(mask, inMask);
/* zbuf = mask ? zval : zbuf */
zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
/* convert zbuffer values from uints back to ushorts, preserve lower 4 */
*zbuf = (vector unsigned short)
spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
((vector unsigned char) {
16, 17, 18, 19, 20, 21, 22, 23,
2, 3, 6, 7, 10, 11, 14, 15}));
}
else {
/* convert zbuffer values from ushorts to uints */
/* gather upper four ushorts */
zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
(vector unsigned int) *zbuf,
((vector unsigned char) {
ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7}));
/* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
/* mask &= inMask */
mask = spu_and(mask, inMask);
/* zbuf = mask ? zval : zbuf */
zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
/* convert zbuffer values from uints back to ushorts, preserve upper 4 */
*zbuf = (vector unsigned short)
spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
((vector unsigned char) {
2, 3, 6, 7, 10, 11, 14, 15,
24, 25, 26, 27, 28, 29, 30, 31}));
}
return mask;
#undef ZERO
}
/**
* As above, but Zbuffer values as 32-bit uints
*/
static INLINE vector unsigned int
spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr,
vector unsigned int inMask)
{
vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr;
/* convert floats to uints in [0, 0xffffffff] */
zvals_ui4 = spu_convtu(zvals, 32);
/* mask = (zbuf < zvals_ui4) ? ~0 : 0 */
mask = spu_cmpgt(zbuf, zvals_ui4);
/* mask &= inMask */
mask = spu_and(mask, inMask);
/* zbuf = mask ? zval : zbuf */
*zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask);
return mask;
}
#endif /* SPU_ZTEST_H */