mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
Cell: implement Z16 and Z32 testing with SIMD instructions.
This commit is contained in:
parent
4f7dcb0e04
commit
7a0099b9f3
3 changed files with 163 additions and 197 deletions
|
|
@ -42,7 +42,8 @@
|
|||
typedef union {
|
||||
ushort t16[TILE_SIZE][TILE_SIZE];
|
||||
uint t32[TILE_SIZE][TILE_SIZE];
|
||||
float4 f4[TILE_SIZE/2][TILE_SIZE/2];
|
||||
vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4];
|
||||
vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2];
|
||||
} tile_t;
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -39,18 +39,11 @@
|
|||
#include "spu_tile.h"
|
||||
#include "spu_tri.h"
|
||||
|
||||
#include "spu_ztest.h"
|
||||
|
||||
/*
|
||||
* If SIMD_Z=1 the Z buffer is floating point and we use vector instructions
|
||||
* to do Z testing/updating.
|
||||
*/
|
||||
#define SIMD_Z 0
|
||||
|
||||
#if SIMD_Z
|
||||
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
|
||||
typedef vector unsigned int mask_t;
|
||||
#else
|
||||
typedef uint mask_t;
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -282,20 +275,11 @@ pack_colors(uint uicolors[4], const float4 fcolors[4])
|
|||
}
|
||||
|
||||
|
||||
|
||||
static unsigned int
|
||||
do_depth_test(int x, int y, unsigned int mask)
|
||||
static INLINE mask_t
|
||||
do_depth_test(int x, int y, mask_t quadmask)
|
||||
{
|
||||
static const float4 zscale16
|
||||
= {.f={65535.0, 65535.0, 65535.0, 65535.0}};
|
||||
static const float4 zscale32
|
||||
= {.f={(float)0xffffffff,
|
||||
(float)0xffffffff,
|
||||
(float)0xffffffff,
|
||||
(float)0xffffffff}};
|
||||
int ix = x - setup.cliprect_minx;
|
||||
int iy = y - setup.cliprect_miny;
|
||||
float4 zvals;
|
||||
mask_t mask;
|
||||
|
||||
zvals.v = eval_z((float) x, (float) y);
|
||||
|
||||
|
|
@ -305,129 +289,20 @@ do_depth_test(int x, int y, unsigned int mask)
|
|||
cur_tile_status_z = TILE_STATUS_DIRTY;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (cur_tile_status_z == TILE_STATUS_CLEAR) {
|
||||
/* now, _really_ clear the tile */
|
||||
clear_z_tile(&ztile);
|
||||
}
|
||||
else if (cur_tile_status_z != TILE_STATUS_DIRTY) {
|
||||
/* make sure we've got the tile from main mem */
|
||||
wait_on_mask(1 << TAG_READ_TILE_Z);
|
||||
}
|
||||
cur_tile_status_z = TILE_STATUS_DIRTY;
|
||||
#endif
|
||||
|
||||
if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
|
||||
zvals.v = spu_mul(zvals.v, zscale16.v);
|
||||
if (mask & MASK_TOP_LEFT) {
|
||||
uint z = (uint) zvals.f[0];
|
||||
if (z < ztile.t16[iy][ix])
|
||||
ztile.t16[iy][ix] = z;
|
||||
else
|
||||
mask &= ~MASK_TOP_LEFT;
|
||||
}
|
||||
|
||||
if (mask & MASK_TOP_RIGHT) {
|
||||
uint z = (uint) zvals.f[1];
|
||||
if (z < ztile.t16[iy][ix+1])
|
||||
ztile.t16[iy][ix+1] = z;
|
||||
else
|
||||
mask &= ~MASK_TOP_RIGHT;
|
||||
}
|
||||
|
||||
if (mask & MASK_BOTTOM_LEFT) {
|
||||
uint z = (uint) zvals.f[2];
|
||||
if (z < ztile.t16[iy+1][ix])
|
||||
ztile.t16[iy+1][ix] = z;
|
||||
else
|
||||
mask &= ~MASK_BOTTOM_LEFT;
|
||||
}
|
||||
|
||||
if (mask & MASK_BOTTOM_RIGHT) {
|
||||
uint z = (uint) zvals.f[3];
|
||||
if (z < ztile.t16[iy+1][ix+1])
|
||||
ztile.t16[iy+1][ix+1] = z;
|
||||
else
|
||||
mask &= ~MASK_BOTTOM_RIGHT;
|
||||
}
|
||||
int ix = (x - setup.cliprect_minx) / 4;
|
||||
int iy = (y - setup.cliprect_miny) / 2;
|
||||
mask = spu_z16_test_less(zvals.v, &ztile.us8[iy][ix], x>>1, quadmask);
|
||||
}
|
||||
else {
|
||||
zvals.v = spu_mul(zvals.v, zscale32.v);
|
||||
ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM);
|
||||
if (mask & MASK_TOP_LEFT) {
|
||||
uint z = (uint) zvals.f[0];
|
||||
if (z < ztile.t32[iy][ix])
|
||||
ztile.t32[iy][ix] = z;
|
||||
else
|
||||
mask &= ~MASK_TOP_LEFT;
|
||||
}
|
||||
|
||||
if (mask & MASK_TOP_RIGHT) {
|
||||
uint z = (uint) zvals.f[1];
|
||||
if (z < ztile.t32[iy][ix+1])
|
||||
ztile.t32[iy][ix+1] = z;
|
||||
else
|
||||
mask &= ~MASK_TOP_RIGHT;
|
||||
}
|
||||
|
||||
if (mask & MASK_BOTTOM_LEFT) {
|
||||
uint z = (uint) zvals.f[2];
|
||||
if (z < ztile.t32[iy+1][ix])
|
||||
ztile.t32[iy+1][ix] = z;
|
||||
else
|
||||
mask &= ~MASK_BOTTOM_LEFT;
|
||||
}
|
||||
|
||||
if (mask & MASK_BOTTOM_RIGHT) {
|
||||
uint z = (uint) zvals.f[3];
|
||||
if (z < ztile.t32[iy+1][ix+1])
|
||||
ztile.t32[iy+1][ix+1] = z;
|
||||
else
|
||||
mask &= ~MASK_BOTTOM_RIGHT;
|
||||
}
|
||||
int ix = (x - setup.cliprect_minx) / 2;
|
||||
int iy = (y - setup.cliprect_miny) / 2;
|
||||
mask = spu_z32_test_less(zvals.v, &ztile.ui4[iy][ix], quadmask);
|
||||
}
|
||||
|
||||
if (mask)
|
||||
cur_tile_status_z = TILE_STATUS_DIRTY;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static vector unsigned int
|
||||
do_depth_test_simd(int x, int y, vector unsigned int quadmask)
|
||||
{
|
||||
int ix = (x - setup.cliprect_minx) / 2;
|
||||
int iy = (y - setup.cliprect_miny) / 2;
|
||||
float4 zvals;
|
||||
|
||||
vector unsigned int zmask;
|
||||
|
||||
zvals.v = eval_z((float) x, (float) y);
|
||||
|
||||
if (cur_tile_status_z == TILE_STATUS_CLEAR) {
|
||||
/* now, _really_ clear the tile */
|
||||
clear_z_tile(&ztile);
|
||||
}
|
||||
else if (cur_tile_status_z != TILE_STATUS_DIRTY) {
|
||||
/* make sure we've got the tile from main mem */
|
||||
wait_on_mask(1 << TAG_READ_TILE_Z);
|
||||
}
|
||||
cur_tile_status_z = TILE_STATUS_DIRTY;
|
||||
|
||||
/* XXX fetch Z value sooner to hide latency here */
|
||||
zmask = spu_cmpgt(ztile.f4[ix][iy].v, zvals.v);
|
||||
zmask = spu_and(zmask, quadmask);
|
||||
|
||||
ztile.f4[ix][iy].v = spu_sel(ztile.f4[ix][iy].v, zvals.v, zmask);
|
||||
//ztile.f4[ix][iy].v = spu_sel(zvals.v, ztile.f4[ix][iy].v, mask4);
|
||||
|
||||
return zmask;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit a quad (pass to next stage). No clipping is done.
|
||||
*/
|
||||
|
|
@ -461,36 +336,18 @@ emit_quad( int x, int y, mask_t mask )
|
|||
}
|
||||
|
||||
if (spu.depth_stencil.depth.enabled) {
|
||||
#if SIMD_Z
|
||||
mask = do_depth_test_simd(x, y, mask);
|
||||
#else
|
||||
mask = do_depth_test(x, y, mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !SIMD_Z
|
||||
if (mask)
|
||||
#endif
|
||||
{
|
||||
if (cur_tile_status_c == TILE_STATUS_CLEAR) {
|
||||
/* now, _really_ clear the tile */
|
||||
clear_c_tile(&ctile);
|
||||
}
|
||||
/* If any bits in mask are set... */
|
||||
if (spu_extract(spu_orx(mask), 0)) {
|
||||
|
||||
#if 0
|
||||
if (cur_tile_status_c == TILE_STATUS_CLEAR) {
|
||||
/* now, _really_ clear the tile */
|
||||
clear_c_tile(&ctile);
|
||||
cur_tile_status_c = TILE_STATUS_DIRTY;
|
||||
}
|
||||
else if (cur_tile_status_c != TILE_STATUS_DIRTY) {
|
||||
/* make sure we've got the tile from main mem */
|
||||
wait_on_mask(1 << TAG_READ_TILE_COLOR);
|
||||
}
|
||||
#endif
|
||||
cur_tile_status_c = TILE_STATUS_DIRTY;
|
||||
|
||||
#if SIMD_Z
|
||||
if (spu_extract(mask, 0))
|
||||
ctile.t32[iy][ix] = colors[QUAD_TOP_LEFT];
|
||||
if (spu_extract(mask, 1))
|
||||
|
|
@ -499,20 +356,11 @@ emit_quad( int x, int y, mask_t mask )
|
|||
ctile.t32[iy+1][ix] = colors[QUAD_BOTTOM_LEFT];
|
||||
if (spu_extract(mask, 3))
|
||||
ctile.t32[iy+1][ix+1] = colors[QUAD_BOTTOM_RIGHT];
|
||||
#elif 0
|
||||
|
||||
#if 0
|
||||
/* SIMD_Z with swizzled color buffer (someday) */
|
||||
vector float icolors = *((vector float *) &colors);
|
||||
ctile.f4[iy/2][ix/2].v = spu_sel(ctile.f4[iy/2][ix/2].v, icolors, mask);
|
||||
|
||||
#else
|
||||
if (mask & MASK_TOP_LEFT)
|
||||
ctile.t32[iy][ix] = colors[QUAD_TOP_LEFT];
|
||||
if (mask & MASK_TOP_RIGHT)
|
||||
ctile.t32[iy][ix+1] = colors[QUAD_TOP_RIGHT];
|
||||
if (mask & MASK_BOTTOM_LEFT)
|
||||
ctile.t32[iy+1][ix] = colors[QUAD_BOTTOM_LEFT];
|
||||
if (mask & MASK_BOTTOM_RIGHT)
|
||||
ctile.t32[iy+1][ix+1] = colors[QUAD_BOTTOM_RIGHT];
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -533,38 +381,20 @@ static INLINE int block( int x )
|
|||
/**
|
||||
* Compute mask which indicates which pixels in the 2x2 quad are actually inside
|
||||
* the triangle's bounds.
|
||||
*
|
||||
* this is pretty nasty... may need to rework flush_spans again to
|
||||
* fix it, if possible.
|
||||
* The mask is a uint4 vector and each element will be 0 or 0xffffffff.
|
||||
*/
|
||||
static mask_t calculate_mask( int x )
|
||||
static INLINE mask_t calculate_mask( int x )
|
||||
{
|
||||
#if SIMD_Z
|
||||
uint m0, m1, m2, m3;
|
||||
|
||||
m0 = (x >= setup.span.left[0] && x < setup.span.right[0]) * ~0;
|
||||
m1 = (x+1 >= setup.span.left[0] && x+1 < setup.span.right[0]) * ~0;
|
||||
m2 = (x >= setup.span.left[1] && x < setup.span.right[1]) * ~0;
|
||||
m3 = (x+1 >= setup.span.left[1] && x+1 < setup.span.right[1]) * ~0;
|
||||
|
||||
return (vector unsigned int) {m0, m1, m2, m3};
|
||||
#else
|
||||
unsigned mask = 0x0;
|
||||
|
||||
if (x >= setup.span.left[0] && x < setup.span.right[0])
|
||||
mask |= MASK_TOP_LEFT;
|
||||
|
||||
if (x >= setup.span.left[1] && x < setup.span.right[1])
|
||||
mask |= MASK_BOTTOM_LEFT;
|
||||
|
||||
if (x+1 >= setup.span.left[0] && x+1 < setup.span.right[0])
|
||||
mask |= MASK_TOP_RIGHT;
|
||||
|
||||
if (x+1 >= setup.span.left[1] && x+1 < setup.span.right[1])
|
||||
mask |= MASK_BOTTOM_RIGHT;
|
||||
|
||||
/* This is a little tricky.
|
||||
* Use & instead of && to avoid branches.
|
||||
* Use negation to convert true/false to ~0/0 values.
|
||||
*/
|
||||
mask_t mask;
|
||||
mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
|
||||
mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
|
||||
mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
|
||||
mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
|
||||
return mask;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
135
src/mesa/pipe/cell/spu/spu_ztest.h
Normal file
135
src/mesa/pipe/cell/spu/spu_ztest.h
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Zbuffer/depth test code.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef SPU_ZTEST_H
|
||||
#define SPU_ZTEST_H
|
||||
|
||||
|
||||
#ifdef __SPU__
|
||||
#include <spu_intrinsics.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Perform Z testing for a 16-bit/value Z buffer.
|
||||
*
|
||||
* \param zvals vector of four fragment zvalues as floats
|
||||
* \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this
|
||||
* contains the Z values for 2 quads, 8 pixels.
|
||||
* \param x x coordinate of quad (only lsbit is significant)
|
||||
* \param inMask indicates which fragments in the quad are alive
|
||||
* \return new mask indicating which fragments are alive after ztest
|
||||
*/
|
||||
static INLINE vector unsigned int
|
||||
spu_z16_test_less(vector float zvals, vector unsigned short *zbuf,
|
||||
uint x, vector unsigned int inMask)
|
||||
{
|
||||
#define ZERO 0x80
|
||||
vector unsigned int zvals_ui4, zbuf_ui4, mask;
|
||||
|
||||
/* convert floats to uints in [0, 65535] */
|
||||
zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */
|
||||
zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */
|
||||
|
||||
/* XXX this conditional could be removed with a bit of work */
|
||||
if (x & 1) {
|
||||
/* convert zbuffer values from ushorts to uints */
|
||||
/* gather lower four ushorts */
|
||||
zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
|
||||
(vector unsigned int) *zbuf,
|
||||
VEC_LITERAL(vector unsigned char,
|
||||
ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11,
|
||||
ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15));
|
||||
/* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
|
||||
mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
|
||||
/* mask &= inMask */
|
||||
mask = spu_and(mask, inMask);
|
||||
/* zbuf = mask ? zval : zbuf */
|
||||
zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
|
||||
/* convert zbuffer values from uints back to ushorts, preserve lower 4 */
|
||||
*zbuf = (vector unsigned short)
|
||||
spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
|
||||
VEC_LITERAL(vector unsigned char,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
2, 3, 6, 7, 10, 11, 14, 15));
|
||||
}
|
||||
else {
|
||||
/* convert zbuffer values from ushorts to uints */
|
||||
/* gather upper four ushorts */
|
||||
zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
|
||||
(vector unsigned int) *zbuf,
|
||||
VEC_LITERAL(vector unsigned char,
|
||||
ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
|
||||
ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7));
|
||||
/* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
|
||||
mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
|
||||
/* mask &= inMask */
|
||||
mask = spu_and(mask, inMask);
|
||||
/* zbuf = mask ? zval : zbuf */
|
||||
zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
|
||||
/* convert zbuffer values from uints back to ushorts, preserve upper 4 */
|
||||
*zbuf = (vector unsigned short)
|
||||
spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
|
||||
VEC_LITERAL(vector unsigned char,
|
||||
2, 3, 6, 7, 10, 11, 14, 15,
|
||||
24, 25, 26, 27, 28, 29, 30, 31));
|
||||
}
|
||||
return mask;
|
||||
#undef ZERO
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* As above, but Zbuffer values as 32-bit uints
|
||||
*/
|
||||
static INLINE vector unsigned int
|
||||
spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr,
|
||||
vector unsigned int inMask)
|
||||
{
|
||||
vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr;
|
||||
|
||||
/* convert floats to uints in [0, 0xffffffff] */
|
||||
zvals_ui4 = spu_convtu(zvals, 32);
|
||||
/* mask = (zbuf < zvals_ui4) ? ~0 : 0 */
|
||||
mask = spu_cmpgt(zbuf, zvals_ui4);
|
||||
/* mask &= inMask */
|
||||
mask = spu_and(mask, inMask);
|
||||
/* zbuf = mask ? zval : zbuf */
|
||||
*zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask);
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
#endif /* SPU_ZTEST_H */
|
||||
Loading…
Add table
Reference in a new issue