mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
cell: checkpoint commit of new per-fragment processing
Do code generation for alpha test, z test, stencil, blend, colormask and framebuffer/tile read/write as a single code block. Ian's previous blend/z/stencil test code is still there but mostly disabled and will be removed soon.
This commit is contained in:
parent
ee582fd3a7
commit
284ab5a612
14 changed files with 998 additions and 16 deletions
|
|
@ -97,6 +97,7 @@
|
|||
#define CELL_CMD_STATE_LOGICOP 21
|
||||
#define CELL_CMD_VS_EXECUTE 22
|
||||
#define CELL_CMD_FLUSH_BUFFER_RANGE 23
|
||||
#define CELL_CMD_STATE_FRAGMENT_OPS 24
|
||||
|
||||
|
||||
#define CELL_NUM_BUFFERS 4
|
||||
|
|
@ -112,30 +113,43 @@
|
|||
|
||||
/**
|
||||
*/
|
||||
struct cell_command_depth_stencil_alpha_test {
|
||||
struct cell_command_depth_stencil_alpha_test
|
||||
{
|
||||
uint64_t base; /**< Effective address of code start. */
|
||||
unsigned size; /**< Size in bytes of SPE code. */
|
||||
unsigned read_depth; /**< Flag: should depth be read? */
|
||||
unsigned read_stencil; /**< Flag: should stencil be read? */
|
||||
struct pipe_depth_stencil_alpha_state state;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Upload code to perform framebuffer blend operation
|
||||
*/
|
||||
struct cell_command_blend {
|
||||
struct cell_command_blend
|
||||
{
|
||||
uint64_t base; /**< Effective address of code start. */
|
||||
unsigned size; /**< Size in bytes of SPE code. */
|
||||
unsigned read_fb; /**< Flag: should framebuffer be read? */
|
||||
};
|
||||
|
||||
|
||||
struct cell_command_logicop {
|
||||
struct cell_command_logicop
|
||||
{
|
||||
uint64_t base; /**< Effective address of code start. */
|
||||
unsigned size; /**< Size in bytes of SPE code. */
|
||||
};
|
||||
|
||||
|
||||
#define SPU_MAX_FRAGMENT_OPS_INSTS 64
|
||||
|
||||
struct cell_command_fragment_ops
|
||||
{
|
||||
uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
|
||||
unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS];
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Tell SPUs about the framebuffer size, location
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ SOURCES = \
|
|||
cell_context.c \
|
||||
cell_draw_arrays.c \
|
||||
cell_flush.c \
|
||||
cell_gen_fragment.c \
|
||||
cell_state_derived.c \
|
||||
cell_state_emit.c \
|
||||
cell_state_per_fragment.c \
|
||||
|
|
|
|||
530
src/gallium/drivers/cell/ppu/cell_gen_fragment.c
Normal file
530
src/gallium/drivers/cell/ppu/cell_gen_fragment.c
Normal file
|
|
@ -0,0 +1,530 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPU per-fragment code (actually per-quad code).
|
||||
* \author Brian Paul
|
||||
*/
|
||||
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "rtasm/rtasm_ppc_spe.h"
|
||||
#include "cell_context.h"
|
||||
#include "cell_gen_fragment.h"
|
||||
|
||||
|
||||
|
||||
/** Do extra optimizations? */
|
||||
#define OPTIMIZATIONS 1
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPE code to perform Z/depth testing.
|
||||
*
|
||||
* \param dsa Gallium depth/stencil/alpha state to gen code for
|
||||
* \param f SPE function to append instruction onto.
|
||||
* \param mask_reg register containing quad/pixel "alive" mask (in/out)
|
||||
* \param ifragZ_reg register containing integer fragment Z values (in)
|
||||
* \param ifbZ_reg register containing integer frame buffer Z values (in/out)
|
||||
* \param zmask_reg register containing result of Z test/comparison (out)
|
||||
*/
|
||||
static void
|
||||
gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
|
||||
struct spe_function *f,
|
||||
int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
|
||||
{
|
||||
ASSERT(dsa->depth.enabled);
|
||||
|
||||
switch (dsa->depth.func) {
|
||||
case PIPE_FUNC_EQUAL:
|
||||
/* zmask = (ifragZ == ref) */
|
||||
spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
|
||||
/* mask = (mask & zmask) */
|
||||
spe_and(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_NOTEQUAL:
|
||||
/* zmask = (ifragZ == ref) */
|
||||
spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
|
||||
/* mask = (mask & ~zmask) */
|
||||
spe_andc(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_GREATER:
|
||||
/* zmask = (ifragZ > ref) */
|
||||
spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
|
||||
/* mask = (mask & zmask) */
|
||||
spe_and(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_LESS:
|
||||
/* zmask = (ref > ifragZ) */
|
||||
spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
|
||||
/* mask = (mask & zmask) */
|
||||
spe_and(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_LEQUAL:
|
||||
/* zmask = (ifragZ > ref) */
|
||||
spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
|
||||
/* mask = (mask & ~zmask) */
|
||||
spe_andc(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_GEQUAL:
|
||||
/* zmask = (ref > ifragZ) */
|
||||
spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
|
||||
/* mask = (mask & ~zmask) */
|
||||
spe_andc(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_NEVER:
|
||||
spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */
|
||||
spe_move(f, zmask_reg, mask_reg); /* zmask = mask */
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_ALWAYS:
|
||||
/* mask unchanged */
|
||||
spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */
|
||||
break;
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (dsa->depth.writemask) {
|
||||
/*
|
||||
* If (ztest passed) {
|
||||
* framebufferZ = fragmentZ;
|
||||
* }
|
||||
* OR,
|
||||
* framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
|
||||
*/
|
||||
spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPE code to perform alpha testing.
|
||||
*
|
||||
* \param dsa Gallium depth/stencil/alpha state to gen code for
|
||||
* \param f SPE function to append instruction onto.
|
||||
* \param mask_reg register containing quad/pixel "alive" mask (in/out)
|
||||
* \param fragA_reg register containing four fragment alpha values (in)
|
||||
*/
|
||||
static void
|
||||
gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
|
||||
struct spe_function *f, int mask_reg, int fragA_reg)
|
||||
{
|
||||
int ref_reg = spe_allocate_available_register(f);
|
||||
int amask_reg = spe_allocate_available_register(f);
|
||||
|
||||
ASSERT(dsa->alpha.enabled);
|
||||
|
||||
if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
|
||||
(dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
|
||||
/* load/splat the alpha reference float value */
|
||||
spe_load_float(f, ref_reg, dsa->alpha.ref);
|
||||
}
|
||||
|
||||
/* emit code to do the alpha comparison, updating 'mask' */
|
||||
switch (dsa->alpha.func) {
|
||||
case PIPE_FUNC_EQUAL:
|
||||
/* amask = (fragA == ref) */
|
||||
spe_fceq(f, amask_reg, fragA_reg, ref_reg);
|
||||
/* mask = (mask & amask) */
|
||||
spe_and(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_NOTEQUAL:
|
||||
/* amask = (fragA == ref) */
|
||||
spe_fceq(f, amask_reg, fragA_reg, ref_reg);
|
||||
/* mask = (mask & ~amask) */
|
||||
spe_andc(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_GREATER:
|
||||
/* amask = (fragA > ref) */
|
||||
spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
|
||||
/* mask = (mask & amask) */
|
||||
spe_and(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_LESS:
|
||||
/* amask = (ref > fragA) */
|
||||
spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
|
||||
/* mask = (mask & amask) */
|
||||
spe_and(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_LEQUAL:
|
||||
/* amask = (fragA > ref) */
|
||||
spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
|
||||
/* mask = (mask & ~amask) */
|
||||
spe_andc(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_GEQUAL:
|
||||
/* amask = (ref > fragA) */
|
||||
spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
|
||||
/* mask = (mask & ~amask) */
|
||||
spe_andc(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_NEVER:
|
||||
spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_ALWAYS:
|
||||
/* no-op, mask unchanged */
|
||||
break;
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
#if OPTIMIZATIONS
|
||||
/* if mask == {0,0,0,0} we're all done, return */
|
||||
{
|
||||
/* re-use amask reg here */
|
||||
int tmp_reg = amask_reg;
|
||||
/* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
|
||||
spe_orx(f, tmp_reg, mask_reg);
|
||||
/* if tmp[0] == 0 then return from function call */
|
||||
spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
spe_release_register(f, ref_reg);
|
||||
spe_release_register(f, amask_reg);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPE code to implement the fragment operations (alpha test,
|
||||
* depth test, stencil test, blending, colormask, and final
|
||||
* framebuffer write) as specified by the current context state.
|
||||
*
|
||||
* Logically, this code will be called after running the fragment
|
||||
* shader. But under some circumstances we could run some of this
|
||||
* code before the fragment shader to cull fragments/quads that are
|
||||
* totally occluded/discarded.
|
||||
*
|
||||
* XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
|
||||
*
|
||||
* See the spu_default_fragment_ops() function to see how the per-fragment
|
||||
* operations would be done with ordinary C code.
|
||||
* The code we generate here though has no branches, is SIMD, etc and
|
||||
* should be much faster.
|
||||
*
|
||||
* \param cell the rendering context (in)
|
||||
* \param f the generated function (out)
|
||||
*/
|
||||
void
|
||||
gen_fragment_function(struct cell_context *cell, struct spe_function *f)
|
||||
{
|
||||
const struct pipe_depth_stencil_alpha_state *dsa =
|
||||
&cell->depth_stencil->base;
|
||||
const struct pipe_blend_state *blend = &cell->blend->base;
|
||||
|
||||
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
|
||||
const int x_reg = 3; /* uint */
|
||||
const int y_reg = 4; /* uint */
|
||||
const int color_tile_reg = 5; /* tile_t * */
|
||||
const int depth_tile_reg = 6; /* tile_t * */
|
||||
const int fragZ_reg = 7; /* vector float */
|
||||
const int fragR_reg = 8; /* vector float */
|
||||
const int fragG_reg = 9; /* vector float */
|
||||
const int fragB_reg = 10; /* vector float */
|
||||
const int fragA_reg = 11; /* vector float */
|
||||
const int mask_reg = 12; /* vector uint */
|
||||
|
||||
/* offset of quad from start of tile
|
||||
* XXX assuming 4-byte pixels for color AND Z/stencil!!!!
|
||||
*/
|
||||
int quad_offset_reg;
|
||||
|
||||
int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
|
||||
int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
|
||||
|
||||
spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
|
||||
spe_allocate_register(f, x_reg);
|
||||
spe_allocate_register(f, y_reg);
|
||||
spe_allocate_register(f, color_tile_reg);
|
||||
spe_allocate_register(f, depth_tile_reg);
|
||||
spe_allocate_register(f, fragZ_reg);
|
||||
spe_allocate_register(f, fragR_reg);
|
||||
spe_allocate_register(f, fragG_reg);
|
||||
spe_allocate_register(f, fragB_reg);
|
||||
spe_allocate_register(f, fragA_reg);
|
||||
spe_allocate_register(f, mask_reg);
|
||||
|
||||
quad_offset_reg = spe_allocate_available_register(f);
|
||||
fbRGBA_reg = spe_allocate_available_register(f);
|
||||
fbZS_reg = spe_allocate_available_register(f);
|
||||
|
||||
/* compute offset of quad from start of tile, in bytes */
|
||||
{
|
||||
int x2_reg = spe_allocate_available_register(f);
|
||||
int y2_reg = spe_allocate_available_register(f);
|
||||
|
||||
ASSERT(TILE_SIZE == 32);
|
||||
|
||||
spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
|
||||
spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
|
||||
spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
|
||||
spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
|
||||
spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
|
||||
|
||||
spe_release_register(f, x2_reg);
|
||||
spe_release_register(f, y2_reg);
|
||||
}
|
||||
|
||||
|
||||
if (dsa->alpha.enabled) {
|
||||
gen_alpha_test(dsa, f, mask_reg, fragA_reg);
|
||||
}
|
||||
|
||||
if (dsa->depth.enabled || dsa->stencil[0].enabled) {
|
||||
const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
|
||||
boolean write_depth_stencil;
|
||||
|
||||
int fbZ_reg = spe_allocate_available_register(f); /* Z values */
|
||||
int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
|
||||
|
||||
/* fetch quad of depth/stencil values from tile at (x,y) */
|
||||
/* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
|
||||
spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
|
||||
|
||||
if (dsa->depth.enabled) {
|
||||
/* Extract Z bits from fbZS_reg into fbZ_reg */
|
||||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
|
||||
int mask_reg = spe_allocate_available_register(f);
|
||||
spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
|
||||
spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */
|
||||
spe_release_register(f, mask_reg);
|
||||
/* OK, fbZ_reg has four 24-bit Z values now */
|
||||
}
|
||||
else {
|
||||
/* XXX handle other z/stencil formats */
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/* Convert fragZ values from float[4] to uint[4] */
|
||||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_Z24S8_UNORM ||
|
||||
zs_format == PIPE_FORMAT_Z24X8_UNORM) {
|
||||
/* 24-bit Z values */
|
||||
int scale_reg = spe_allocate_available_register(f);
|
||||
|
||||
/* scale_reg[0,1,2,3] = float(2^24-1) */
|
||||
spe_load_float(f, scale_reg, (float) 0xffffff);
|
||||
|
||||
/* XXX these two instructions might be combined */
|
||||
spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
|
||||
spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */
|
||||
|
||||
spe_release_register(f, scale_reg);
|
||||
}
|
||||
else {
|
||||
/* XXX handle 16-bit Z format */
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (dsa->stencil[0].enabled) {
|
||||
/* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
|
||||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
|
||||
/* XXX extract with a shift */
|
||||
ASSERT(0);
|
||||
}
|
||||
else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
|
||||
zs_format == PIPE_FORMAT_Z24X8_UNORM) {
|
||||
/* XXX extract with a mask */
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (dsa->stencil[0].enabled) {
|
||||
/* XXX this may involve depth testing too */
|
||||
// gen_stencil_test(dsa, f, ... );
|
||||
ASSERT(0);
|
||||
}
|
||||
else if (dsa->depth.enabled) {
|
||||
int zmask_reg = spe_allocate_available_register(f);
|
||||
gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
|
||||
spe_release_register(f, zmask_reg);
|
||||
}
|
||||
|
||||
/* do we need to write Z and/or Stencil back into framebuffer? */
|
||||
write_depth_stencil = (dsa->depth.writemask |
|
||||
dsa->stencil[0].write_mask |
|
||||
dsa->stencil[1].write_mask);
|
||||
|
||||
if (write_depth_stencil) {
|
||||
/* Merge latest Z and Stencil values into fbZS_reg.
|
||||
* fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
|
||||
* fbS_reg has four 8-bit Z values in bits [7..0].
|
||||
*/
|
||||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
|
||||
spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
|
||||
spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
|
||||
}
|
||||
else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
|
||||
/* XXX to do */
|
||||
ASSERT(0);
|
||||
}
|
||||
else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
|
||||
/* XXX to do */
|
||||
ASSERT(0);
|
||||
}
|
||||
else if (zs_format == PIPE_FORMAT_S8_UNORM) {
|
||||
/* XXX to do */
|
||||
ASSERT(0);
|
||||
}
|
||||
else {
|
||||
/* bad zs_format */
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
|
||||
spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
|
||||
}
|
||||
|
||||
spe_release_register(f, fbZ_reg);
|
||||
spe_release_register(f, fbS_reg);
|
||||
}
|
||||
|
||||
|
||||
/* Get framebuffer quad/colors. We'll need these for blending,
|
||||
* color masking, and to obey the quad/pixel mask.
|
||||
* Load: fbRGBA_reg = memory[color_tile + quad_offset]
|
||||
* Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
|
||||
* we could skip this load.
|
||||
*/
|
||||
spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
|
||||
|
||||
|
||||
if (blend->blend_enable) {
|
||||
/* convert packed tile colors in fbRGBA_reg to float[4] vectors */
|
||||
|
||||
// gen_blend_code(blend, f, mask_reg, ... );
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Write fragment colors to framebuffer/tile.
|
||||
* This involves converting the fragment colors from float[4] to the
|
||||
* tile's specific format and obeying the quad/pixel mask.
|
||||
*/
|
||||
{
|
||||
const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
|
||||
int rgba_reg = spe_allocate_available_register(f);
|
||||
|
||||
/* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
|
||||
spe_cfltu(f, fragR_reg, fragR_reg, 32);
|
||||
spe_cfltu(f, fragG_reg, fragG_reg, 32);
|
||||
spe_cfltu(f, fragB_reg, fragB_reg, 32);
|
||||
spe_cfltu(f, fragA_reg, fragA_reg, 32);
|
||||
|
||||
/* Shift most the significant bytes to least the significant positions.
|
||||
* I.e.: reg = reg >> 24
|
||||
*/
|
||||
spe_rotmi(f, fragR_reg, fragR_reg, -24);
|
||||
spe_rotmi(f, fragG_reg, fragG_reg, -24);
|
||||
spe_rotmi(f, fragB_reg, fragB_reg, -24);
|
||||
spe_rotmi(f, fragA_reg, fragA_reg, -24);
|
||||
|
||||
/* Shift the color bytes according to the surface format */
|
||||
if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
|
||||
spe_roti(f, fragG_reg, fragG_reg, 8); /* green <<= 8 */
|
||||
spe_roti(f, fragR_reg, fragR_reg, 16); /* red <<= 16 */
|
||||
spe_roti(f, fragA_reg, fragA_reg, 24); /* alpha <<= 24 */
|
||||
}
|
||||
else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
|
||||
spe_roti(f, fragR_reg, fragR_reg, 8); /* red <<= 8 */
|
||||
spe_roti(f, fragG_reg, fragG_reg, 16); /* green <<= 16 */
|
||||
spe_roti(f, fragB_reg, fragB_reg, 24); /* blue <<= 24 */
|
||||
}
|
||||
else {
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/* Merge red, green, blue, alpha registers to make packed RGBA colors.
|
||||
* Eg: after shifting according to color_format we might have:
|
||||
* R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
|
||||
* G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
|
||||
* B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
|
||||
* A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
|
||||
* OR-ing all those together gives us four packed colors:
|
||||
* RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
|
||||
*/
|
||||
spe_or(f, rgba_reg, fragR_reg, fragG_reg);
|
||||
spe_or(f, rgba_reg, rgba_reg, fragB_reg);
|
||||
spe_or(f, rgba_reg, rgba_reg, fragA_reg);
|
||||
|
||||
/* Mix fragment colors with framebuffer colors using the quad/pixel mask:
|
||||
* if (mask[i])
|
||||
* rgba[i] = rgba[i];
|
||||
* else
|
||||
* rgba[i] = framebuffer[i];
|
||||
*/
|
||||
spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
|
||||
|
||||
/* Store updated quad in tile:
|
||||
* memory[color_tile + quad_offset] = rgba_reg;
|
||||
*/
|
||||
spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
|
||||
|
||||
spe_release_register(f, rgba_reg);
|
||||
}
|
||||
|
||||
printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
|
||||
|
||||
spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
|
||||
|
||||
|
||||
spe_release_register(f, fbRGBA_reg);
|
||||
spe_release_register(f, fbZS_reg);
|
||||
spe_release_register(f, quad_offset_reg);
|
||||
}
|
||||
|
||||
38
src/gallium/drivers/cell/ppu/cell_gen_fragment.h
Normal file
38
src/gallium/drivers/cell/ppu/cell_gen_fragment.h
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#ifndef CELL_GEN_FRAGMENT_H
|
||||
#define CELL_GEN_FRAGMENT_H
|
||||
|
||||
|
||||
extern void
|
||||
gen_fragment_function(struct cell_context *cell, struct spe_function *f);
|
||||
|
||||
|
||||
#endif /* CELL_GEN_FRAGMENT_H */
|
||||
|
||||
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#include "util/u_memory.h"
|
||||
#include "cell_context.h"
|
||||
#include "cell_gen_fragment.h"
|
||||
#include "cell_state.h"
|
||||
#include "cell_state_emit.h"
|
||||
#include "cell_state_per_fragment.h"
|
||||
|
|
@ -83,6 +84,29 @@ cell_emit_state(struct cell_context *cell)
|
|||
fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE;
|
||||
fb->width = cell->framebuffer.width;
|
||||
fb->height = cell->framebuffer.height;
|
||||
#if 0
|
||||
printf("EMIT color format %s\n", pf_name(fb->color_format));
|
||||
printf("EMIT depth format %s\n", pf_name(fb->depth_format));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_DEPTH_STENCIL)) {
|
||||
/* XXX we don't want to always do codegen here. We should have
|
||||
* a hash/lookup table to cache previous results...
|
||||
*/
|
||||
struct cell_command_fragment_ops *fops
|
||||
= cell_batch_alloc(cell, sizeof(*fops));
|
||||
struct spe_function spe_code;
|
||||
|
||||
/* generate new code */
|
||||
gen_fragment_function(cell, &spe_code);
|
||||
/* put the new code into the batch buffer */
|
||||
fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
|
||||
memcpy(&fops->code, spe_code.store,
|
||||
SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
|
||||
/* free codegen buffer */
|
||||
spe_release_func(&spe_code);
|
||||
}
|
||||
|
||||
if (cell->dirty & CELL_NEW_BLEND) {
|
||||
|
|
@ -90,8 +114,7 @@ cell_emit_state(struct cell_context *cell)
|
|||
|
||||
if (cell->blend != NULL) {
|
||||
blend.base = (intptr_t) cell->blend->code.store;
|
||||
blend.size = (char *) cell->blend->code.csr
|
||||
- (char *) cell->blend->code.store;
|
||||
blend.size = cell->blend->code.num_inst * SPE_INST_SIZE;
|
||||
blend.read_fb = TRUE;
|
||||
}
|
||||
else {
|
||||
|
|
@ -108,10 +131,10 @@ cell_emit_state(struct cell_context *cell)
|
|||
|
||||
if (cell->depth_stencil != NULL) {
|
||||
dsat.base = (intptr_t) cell->depth_stencil->code.store;
|
||||
dsat.size = (char *) cell->depth_stencil->code.csr
|
||||
- (char *) cell->depth_stencil->code.store;
|
||||
dsat.size = cell->depth_stencil->code.num_inst * SPE_INST_SIZE;
|
||||
dsat.read_depth = TRUE;
|
||||
dsat.read_stencil = FALSE;
|
||||
dsat.state = cell->depth_stencil->base;
|
||||
}
|
||||
else {
|
||||
dsat.base = 0;
|
||||
|
|
|
|||
|
|
@ -1158,7 +1158,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
|
|||
static int
|
||||
PC_OFFSET(const struct spe_function *f, const void *d)
|
||||
{
|
||||
const intptr_t pc = (intptr_t) f->csr;
|
||||
const intptr_t pc = (intptr_t) &f->store[f->num_inst];
|
||||
const intptr_t ea = ~0x0f & (intptr_t) d;
|
||||
|
||||
return (ea - pc) >> 2;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ INCLUDE_DIRS = \
|
|||
$(SPU_CC) $(SPU_CFLAGS) -c $<
|
||||
|
||||
.c.s:
|
||||
$(SPU_CC) $(SPU_CFLAGS) -S $<
|
||||
$(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
|
||||
|
||||
|
||||
# The .a file will be linked into the main/PPU executable
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
|
||||
#include "spu_main.h"
|
||||
#include "spu_render.h"
|
||||
#include "spu_per_fragment_op.h"
|
||||
#include "spu_texture.h"
|
||||
#include "spu_tile.h"
|
||||
//#include "spu_test.h"
|
||||
|
|
@ -46,7 +47,7 @@
|
|||
/*
|
||||
helpful headers:
|
||||
/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
|
||||
/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
|
||||
/opt/cell/sdk/usr/include/libmisc.h
|
||||
*/
|
||||
|
||||
boolean Debug = FALSE;
|
||||
|
|
@ -226,6 +227,24 @@ cmd_release_verts(const struct cell_command_release_verts *release)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Process a CELL_CMD_STATE_FRAGMENT_OPS command.
|
||||
* This involves installing new fragment ops SPU code.
|
||||
* If this function is never called, we'll use a regular C fallback function
|
||||
* for fragment processing.
|
||||
*/
|
||||
static void
|
||||
cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
|
||||
{
|
||||
if (Debug)
|
||||
printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
|
||||
/* Copy SPU code from batch buffer to spu buffer */
|
||||
memcpy(spu.fragment_ops.code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
|
||||
/* Point function pointer at new code */
|
||||
spu.fragment_ops.func = (spu_fragment_ops_func) spu.fragment_ops.code;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
|
||||
{
|
||||
|
|
@ -257,6 +276,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
|
|||
break;
|
||||
case PIPE_FORMAT_Z24S8_UNORM:
|
||||
case PIPE_FORMAT_S8Z24_UNORM:
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_X8Z24_UNORM:
|
||||
spu.fb.zsize = 4;
|
||||
spu.fb.zscale = (float) 0x00ffffffu;
|
||||
break;
|
||||
|
|
@ -282,6 +303,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
|
|||
}
|
||||
|
||||
|
||||
#define NEW_FRAGMENT_FUNCTION 01
|
||||
|
||||
static void
|
||||
cmd_state_blend(const struct cell_command_blend *state)
|
||||
{
|
||||
|
|
@ -302,7 +325,9 @@ cmd_state_blend(const struct cell_command_blend *state)
|
|||
wait_on_mask(1 << TAG_BATCH_BUFFER);
|
||||
spu.blend = (blend_func) fb_blend_code_buffer;
|
||||
spu.read_fb = state->read_fb;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
spu.read_fb = FALSE;
|
||||
}
|
||||
}
|
||||
|
|
@ -326,7 +351,9 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat
|
|||
0, /* tid */
|
||||
0 /* rid */);
|
||||
wait_on_mask(1 << TAG_BATCH_BUFFER);
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If there is no code, emit a return instruction.
|
||||
*/
|
||||
depth_stencil_code_buffer[0] = 0x35;
|
||||
|
|
@ -338,12 +365,14 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat
|
|||
spu.frag_test = (frag_test_func) depth_stencil_code_buffer;
|
||||
spu.read_depth = state->read_depth;
|
||||
spu.read_stencil = state->read_stencil;
|
||||
spu.depth_stencil_alpha = state->state;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_state_logicop(const struct cell_command_logicop * code)
|
||||
{
|
||||
#if !NEW_FRAGMENT_FUNCTION
|
||||
mfc_get(logicop_code_buffer,
|
||||
(unsigned int) code->base, /* src */
|
||||
code->size,
|
||||
|
|
@ -353,6 +382,7 @@ cmd_state_logicop(const struct cell_command_logicop * code)
|
|||
wait_on_mask(1 << TAG_BATCH_BUFFER);
|
||||
|
||||
spu.logicop = (logicop_func) logicop_code_buffer;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -455,7 +485,9 @@ cmd_finish(void)
|
|||
|
||||
|
||||
/**
|
||||
* Execute a batch of commands
|
||||
* Execute a batch of commands which was sent to us by the PPU.
|
||||
* See the cell_emit_state.c code to see where the commands come from.
|
||||
*
|
||||
* The opcode param encodes the location of the buffer and its size.
|
||||
*/
|
||||
static void
|
||||
|
|
@ -519,6 +551,14 @@ cmd_batch(uint opcode)
|
|||
pos += pos_incr;
|
||||
}
|
||||
break;
|
||||
case CELL_CMD_STATE_FRAGMENT_OPS:
|
||||
{
|
||||
struct cell_command_fragment_ops *fops
|
||||
= (struct cell_command_fragment_ops *) &buffer[pos];
|
||||
cmd_state_fragment_ops(fops);
|
||||
pos += sizeof(*fops) / 8;
|
||||
}
|
||||
break;
|
||||
case CELL_CMD_RELEASE_VERTS:
|
||||
{
|
||||
struct cell_command_release_verts *release
|
||||
|
|
@ -680,6 +720,11 @@ one_time_init(void)
|
|||
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
|
||||
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
|
||||
invalidate_tex_cache();
|
||||
|
||||
/* Install default/fallback fragment processing function.
|
||||
* This will normally be overriden by a code-gen'd function.
|
||||
*/
|
||||
spu.fragment_ops.func = spu_fallback_fragment_ops;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -91,6 +91,24 @@ typedef struct spu_blend_results (*logicop_func)(
|
|||
|
||||
typedef vector float (*sample_texture_func)(uint unit, vector float texcoord);
|
||||
|
||||
|
||||
typedef void (*spu_fragment_ops_func)(uint x, uint y,
|
||||
tile_t *colorTile,
|
||||
tile_t *depthStencilTile,
|
||||
vector float fragZ,
|
||||
vector float fragRed,
|
||||
vector float fragGreen,
|
||||
vector float fragBlue,
|
||||
vector float fragAlpha,
|
||||
vector unsigned int mask);
|
||||
|
||||
struct spu_fragment_ops
|
||||
{
|
||||
uint code[SPU_MAX_FRAGMENT_OPS_INSTS];
|
||||
spu_fragment_ops_func func; /**< Current fragment ops function */
|
||||
} ALIGN16_ATTRIB;
|
||||
|
||||
|
||||
struct spu_framebuffer {
|
||||
void *color_start; /**< addr of color surface in main memory */
|
||||
void *depth_start; /**< addr of depth surface in main memory */
|
||||
|
|
@ -127,6 +145,9 @@ struct spu_global
|
|||
struct cell_init_info init;
|
||||
|
||||
struct spu_framebuffer fb;
|
||||
|
||||
struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
|
||||
|
||||
boolean read_depth;
|
||||
boolean read_stencil;
|
||||
frag_test_func frag_test; /**< Current depth/stencil test code */
|
||||
|
|
@ -142,6 +163,8 @@ struct spu_global
|
|||
|
||||
struct vertex_info vertex_info;
|
||||
|
||||
struct spu_fragment_ops fragment_ops;
|
||||
|
||||
/* XXX more state to come */
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -29,8 +29,11 @@
|
|||
* \author Ian Romanick <idr@us.ibm.com>
|
||||
*/
|
||||
|
||||
|
||||
#include <transpose_matrix4x4.h>
|
||||
#include "pipe/p_format.h"
|
||||
#include "spu_main.h"
|
||||
#include "spu_colorpack.h"
|
||||
#include "spu_per_fragment_op.h"
|
||||
|
||||
#define ZERO 0x80
|
||||
|
|
@ -90,7 +93,8 @@ read_ds_quad(tile_t *tile, unsigned x, unsigned y,
|
|||
break;
|
||||
}
|
||||
|
||||
case PIPE_FORMAT_S8Z24_UNORM: {
|
||||
case PIPE_FORMAT_S8Z24_UNORM:
|
||||
case PIPE_FORMAT_X8Z24_UNORM: {
|
||||
qword *ptr = (qword *) &tile->ui4[iy][ix];
|
||||
|
||||
*depth = si_and(*ptr, si_fsmbi(0x7777));
|
||||
|
|
@ -153,7 +157,8 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
|
|||
break;
|
||||
}
|
||||
|
||||
case PIPE_FORMAT_S8Z24_UNORM: {
|
||||
case PIPE_FORMAT_S8Z24_UNORM:
|
||||
case PIPE_FORMAT_X8Z24_UNORM: {
|
||||
qword *ptr = (qword *) &buffer->ui4[iy][ix];
|
||||
/* form select mask = 0111,0111,0111,0111 */
|
||||
qword mask = si_fsmbi(0x7777);
|
||||
|
|
@ -217,3 +222,225 @@ spu_do_depth_stencil(int x, int y,
|
|||
|
||||
return result.mask;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Called by rasterizer for each quad after the shader has run. This
|
||||
* is a fallback/debug function. In reality we'll use a generated
|
||||
* function produced by the PPU. But this function is useful for
|
||||
* debug/validation.
|
||||
*/
|
||||
void
|
||||
spu_fallback_fragment_ops(uint x, uint y,
|
||||
tile_t *colorTile,
|
||||
tile_t *depthStencilTile,
|
||||
vector float fragZ,
|
||||
vector float fragRed,
|
||||
vector float fragGreen,
|
||||
vector float fragBlue,
|
||||
vector float fragAlpha,
|
||||
vector unsigned int mask)
|
||||
{
|
||||
vector float frag_soa[4], frag_aos[4];
|
||||
unsigned int c0, c1, c2, c3;
|
||||
|
||||
/* do alpha test */
|
||||
if (spu.depth_stencil_alpha.alpha.enabled) {
|
||||
vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
|
||||
vector unsigned int amask;
|
||||
|
||||
switch (spu.depth_stencil_alpha.alpha.func) {
|
||||
case PIPE_FUNC_LESS:
|
||||
amask = spu_cmpgt(ref, fragAlpha); /* mask = (fragAlpha < ref) */
|
||||
break;
|
||||
case PIPE_FUNC_GREATER:
|
||||
amask = spu_cmpgt(fragAlpha, ref); /* mask = (fragAlpha > ref) */
|
||||
break;
|
||||
case PIPE_FUNC_GEQUAL:
|
||||
amask = spu_cmpgt(ref, fragAlpha);
|
||||
amask = spu_nor(amask, amask);
|
||||
break;
|
||||
case PIPE_FUNC_LEQUAL:
|
||||
amask = spu_cmpgt(fragAlpha, ref);
|
||||
amask = spu_nor(amask, amask);
|
||||
break;
|
||||
case PIPE_FUNC_EQUAL:
|
||||
amask = spu_cmpeq(ref, fragAlpha);
|
||||
break;
|
||||
case PIPE_FUNC_NOTEQUAL:
|
||||
amask = spu_cmpeq(ref, fragAlpha);
|
||||
amask = spu_nor(amask, amask);
|
||||
break;
|
||||
case PIPE_FUNC_ALWAYS:
|
||||
amask = spu_splats(0xffffffffU);
|
||||
break;
|
||||
case PIPE_FUNC_NEVER:
|
||||
amask = spu_splats( 0x0U);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
||||
mask = spu_and(mask, amask);
|
||||
}
|
||||
|
||||
/* Z and/or stencil testing... */
|
||||
if (spu.depth_stencil_alpha.depth.enabled ||
|
||||
spu.depth_stencil_alpha.stencil[0].enabled) {
|
||||
|
||||
/* get four Z/Stencil values from tile */
|
||||
vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
|
||||
vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
|
||||
vector unsigned int ifbZ = spu_and(ifbZS, mask24);
|
||||
vector unsigned int ifbS = spu_andc(ifbZS, mask24);
|
||||
|
||||
if (spu.depth_stencil_alpha.stencil[0].enabled) {
|
||||
/* do stencil test */
|
||||
ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
|
||||
|
||||
}
|
||||
else if (spu.depth_stencil_alpha.depth.enabled) {
|
||||
/* do depth test */
|
||||
|
||||
ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
|
||||
|
||||
vector unsigned int ifragZ;
|
||||
vector unsigned int zmask;
|
||||
|
||||
/* convert four fragZ from float to uint */
|
||||
fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
|
||||
ifragZ = spu_convtu(fragZ, 0);
|
||||
|
||||
/* do depth comparison, setting zmask with results */
|
||||
switch (spu.depth_stencil_alpha.depth.func) {
|
||||
case PIPE_FUNC_LESS:
|
||||
zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
|
||||
break;
|
||||
case PIPE_FUNC_GREATER:
|
||||
zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
|
||||
break;
|
||||
case PIPE_FUNC_GEQUAL:
|
||||
zmask = spu_cmpgt(ifbZ, ifragZ);
|
||||
zmask = spu_nor(zmask, zmask);
|
||||
break;
|
||||
case PIPE_FUNC_LEQUAL:
|
||||
zmask = spu_cmpgt(ifragZ, ifbZ);
|
||||
zmask = spu_nor(zmask, zmask);
|
||||
break;
|
||||
case PIPE_FUNC_EQUAL:
|
||||
zmask = spu_cmpeq(ifbZ, ifragZ);
|
||||
break;
|
||||
case PIPE_FUNC_NOTEQUAL:
|
||||
zmask = spu_cmpeq(ifbZ, ifragZ);
|
||||
zmask = spu_nor(zmask, zmask);
|
||||
break;
|
||||
case PIPE_FUNC_ALWAYS:
|
||||
zmask = spu_splats(0xffffffffU);
|
||||
break;
|
||||
case PIPE_FUNC_NEVER:
|
||||
zmask = spu_splats( 0x0U);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
||||
mask = spu_and(mask, zmask);
|
||||
|
||||
/* merge framebuffer Z and fragment Z according to the mask */
|
||||
ifbZ = spu_or(spu_and(ifragZ, mask),
|
||||
spu_andc(ifbZ, mask));
|
||||
}
|
||||
|
||||
if (spu_extract(spu_orx(mask), 0)) {
|
||||
/* put new fragment Z/Stencil values back into Z/Stencil tile */
|
||||
depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
|
||||
|
||||
spu.cur_ztile_status = TILE_STATUS_DIRTY;
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX do blending here */
|
||||
|
||||
/* XXX do colormask test here */
|
||||
|
||||
|
||||
if (spu_extract(spu_orx(mask), 0)) {
|
||||
spu.cur_ctile_status = TILE_STATUS_DIRTY;
|
||||
}
|
||||
else {
|
||||
return;
|
||||
}
|
||||
|
||||
/* convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA */
|
||||
#if 0
|
||||
{
|
||||
vector float frag_soa[4];
|
||||
frag_soa[0] = fragRed;
|
||||
frag_soa[1] = fragGreen;
|
||||
frag_soa[2] = fragBlue;
|
||||
frag_soa[3] = fragAlpha;
|
||||
_transpose_matrix4x4(frag_aos, frag_soa);
|
||||
}
|
||||
#else
|
||||
/* short-cut relying on function parameter layout: */
|
||||
_transpose_matrix4x4(frag_aos, &fragRed);
|
||||
(void) fragGreen;
|
||||
(void) fragBlue;
|
||||
#endif
|
||||
|
||||
switch (spu.fb.color_format) {
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
c0 = spu_pack_A8R8G8B8(frag_aos[0]);
|
||||
c1 = spu_pack_A8R8G8B8(frag_aos[1]);
|
||||
c2 = spu_pack_A8R8G8B8(frag_aos[2]);
|
||||
c3 = spu_pack_A8R8G8B8(frag_aos[3]);
|
||||
break;
|
||||
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
c0 = spu_pack_B8G8R8A8(frag_aos[0]);
|
||||
c1 = spu_pack_B8G8R8A8(frag_aos[1]);
|
||||
c2 = spu_pack_B8G8R8A8(frag_aos[2]);
|
||||
c3 = spu_pack_B8G8R8A8(frag_aos[3]);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Quad layout:
|
||||
* +--+--+
|
||||
* |p0|p1|
|
||||
* +--+--+
|
||||
* |p2|p3|
|
||||
* +--+--+
|
||||
*/
|
||||
if (spu_extract(mask, 0))
|
||||
colorTile->ui[y+0][x+0] = c0;
|
||||
if (spu_extract(mask, 1))
|
||||
colorTile->ui[y+0][x+1] = c1;
|
||||
if (spu_extract(mask, 2))
|
||||
colorTile->ui[y+1][x+0] = c2;
|
||||
if (spu_extract(mask, 3))
|
||||
colorTile->ui[y+1][x+1] = c3;
|
||||
#else
|
||||
/*
|
||||
* Quad layout:
|
||||
* +--+--+--+--+
|
||||
* |p0|p1|p2|p3|
|
||||
* +--+--+--+--+
|
||||
*/
|
||||
if (spu_extract(mask, 0))
|
||||
colorTile->ui[y][x*2] = c0;
|
||||
if (spu_extract(mask, 1))
|
||||
colorTile->ui[y][x*2+1] = c1;
|
||||
if (spu_extract(mask, 2))
|
||||
colorTile->ui[y][x*2+2] = c2;
|
||||
if (spu_extract(mask, 3))
|
||||
colorTile->ui[y][x*2+3] = c3;
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,4 +29,15 @@ extern qword
|
|||
spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth,
|
||||
qword frag_alpha, qword facing);
|
||||
|
||||
extern void
|
||||
spu_fallback_fragment_ops(uint x, uint y,
|
||||
tile_t *colorTile,
|
||||
tile_t *depthStencilTile,
|
||||
vector float fragZ,
|
||||
vector float fragRed,
|
||||
vector float fragGreen,
|
||||
vector float fragBlue,
|
||||
vector float fragAlpha,
|
||||
vector unsigned int mask);
|
||||
|
||||
#endif /* SPU_PER_FRAGMENT_OP */
|
||||
|
|
|
|||
|
|
@ -297,9 +297,12 @@ emit_quad( int x, int y, mask_t mask )
|
|||
sp->quad.first->run(sp->quad.first, &setup.quad);
|
||||
#else
|
||||
|
||||
#define NEW_FRAGMENT_FUNCTION 01
|
||||
#if !NEW_FRAGMENT_FUNCTION
|
||||
if (spu.read_depth) {
|
||||
mask = do_depth_test(x, y, mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If any bits in mask are set... */
|
||||
if (spu_extract(spu_orx(mask), 0)) {
|
||||
|
|
@ -308,6 +311,7 @@ emit_quad( int x, int y, mask_t mask )
|
|||
vector float colors[4];
|
||||
|
||||
spu.cur_ctile_status = TILE_STATUS_DIRTY;
|
||||
spu.cur_ztile_status = TILE_STATUS_DIRTY;
|
||||
|
||||
if (spu.texture[0].start) {
|
||||
/* texture mapping */
|
||||
|
|
@ -355,6 +359,29 @@ emit_quad( int x, int y, mask_t mask )
|
|||
}
|
||||
|
||||
|
||||
#if NEW_FRAGMENT_FUNCTION
|
||||
{
|
||||
/* Convert fragment data from AoS to SoA format.
|
||||
* I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
|
||||
* This is temporary!
|
||||
*/
|
||||
vector float soa_frag[4];
|
||||
_transpose_matrix4x4(soa_frag, colors);
|
||||
|
||||
float4 fragZ;
|
||||
|
||||
fragZ.v = eval_z((float) x, (float) y);
|
||||
|
||||
/* Do all per-fragment/quad operations here, including:
|
||||
* alpha test, z test, stencil test, blend and framebuffer writing.
|
||||
*/
|
||||
spu.fragment_ops.func(ix, iy, &spu.ctile, &spu.ztile,
|
||||
fragZ.v,
|
||||
soa_frag[0], soa_frag[1],
|
||||
soa_frag[2], soa_frag[3],
|
||||
mask);
|
||||
}
|
||||
#else
|
||||
/* Convert fragment data from AoS to SoA format.
|
||||
* I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
|
||||
*/
|
||||
|
|
@ -405,6 +432,9 @@ emit_quad( int x, int y, mask_t mask )
|
|||
spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0);
|
||||
spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0);
|
||||
spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0);
|
||||
|
||||
#endif /* NEW_FRAGMENT_FUNCTION */
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -349,12 +349,17 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type,
|
|||
|
||||
if (vis->mesa_visual.depthBits == 0)
|
||||
depthFormat = PIPE_FORMAT_NONE;
|
||||
#ifdef GALLIUM_CELL /* XXX temporary for Cell! */
|
||||
else
|
||||
depthFormat = PIPE_FORMAT_S8Z24_UNORM;
|
||||
#else
|
||||
else if (vis->mesa_visual.depthBits <= 16)
|
||||
depthFormat = PIPE_FORMAT_Z16_UNORM;
|
||||
depthFormat = PIPE_FORMAT_Z16UNORM;
|
||||
else if (vis->mesa_visual.depthBits <= 24)
|
||||
depthFormat = PIPE_FORMAT_S8Z24_UNORM;
|
||||
else
|
||||
depthFormat = PIPE_FORMAT_Z32_UNORM;
|
||||
#endif
|
||||
|
||||
if (vis->mesa_visual.stencilBits == 8) {
|
||||
if (depthFormat == PIPE_FORMAT_S8Z24_UNORM)
|
||||
|
|
|
|||
|
|
@ -275,6 +275,39 @@ xm_buffer_destroy(struct pipe_winsys *pws,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* For Cell. Basically, rearrange the pixels/quads from this layout:
|
||||
* +--+--+--+--+
|
||||
* |p0|p1|p2|p3|....
|
||||
* +--+--+--+--+
|
||||
*
|
||||
* to this layout:
|
||||
* +--+--+
|
||||
* |p0|p1|....
|
||||
* +--+--+
|
||||
* |p2|p3|
|
||||
* +--+--+
|
||||
*/
|
||||
static void
|
||||
twiddle_tile(uint *tile)
|
||||
{
|
||||
uint tile2[TILE_SIZE * TILE_SIZE];
|
||||
int y, x;
|
||||
|
||||
for (y = 0; y < TILE_SIZE; y+=2) {
|
||||
for (x = 0; x < TILE_SIZE; x+=2) {
|
||||
int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
|
||||
tile2[y * TILE_SIZE + (x + 0)] = tile[k];
|
||||
tile2[y * TILE_SIZE + (x + 1)] = tile[k+1];
|
||||
tile2[(y + 1) * TILE_SIZE + (x + 0)] = tile[k+2];
|
||||
tile2[(y + 1) * TILE_SIZE + (x + 1)] = tile[k+3];
|
||||
}
|
||||
}
|
||||
memcpy(tile, tile2, sizeof(tile2));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Display a surface that's in a tiled configuration. That is, all the
|
||||
* pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory.
|
||||
|
|
@ -321,6 +354,8 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
|
|||
|
||||
ximage->data = (char *) xm_buf->data + offset;
|
||||
|
||||
twiddle_tile((uint *) ximage->data);
|
||||
|
||||
if (XSHM_ENABLED(xm_buf)) {
|
||||
#if defined(USE_XSHM) && !defined(XFree86Server)
|
||||
XShmPutImage(b->xm_visual->display, b->drawable, b->gc,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue