mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
CELL: changes to generate SPU code for stenciling
This set of code changes are for stencil code generation
support. Both one-sided and two-sided stenciling are supported.
In addition to the raw code generation changes, these changes had
to be made elsewhere in the system:
- Added new "register set" feature to the SPE assembly generation.
A "register set" is a way to allocate multiple registers and free
them all at the same time, delegating register allocation management
to the spe_function unit. It's quite useful in complex register
allocation schemes (like stenciling).
- Added and improved SPE macro calculations.
These are operations between registers and unsigned integer
immediates. In many cases, the calculation can be performed
with a single instruction; the macros will generate the
single instruction if possible, or generate a register load
and register-to-register operation if not. These macro
functions are: spe_load_uint() (which has new ways to
load a value in a single instruction), spe_and_uint(),
spe_xor_uint(), spe_compare_equal_uint(), and spe_compare_greater_uint().
- Added facing to fragment generation. While rendering, the rasterizer
needs to be able to determine front- and back-facing fragments, in order
to correctly apply two-sided stencil. That requires these changes:
- Added front_winding field to the cell_command_render block, so that
the state tracker could communicate to the rasterizer what it
considered to be the front-facing direction.
- Added fragment facing as an input to the fragment function.
- Calculated facing is passed during emit_quad().
This commit is contained in:
parent
22eb067c88
commit
afaa53040b
12 changed files with 1092 additions and 147 deletions
|
|
@ -359,14 +359,21 @@ void _name (struct spe_function *p, int imm) \
|
|||
*/
|
||||
void spe_init_func(struct spe_function *p, unsigned code_size)
|
||||
{
|
||||
register unsigned int i;
|
||||
|
||||
p->store = align_malloc(code_size, 16);
|
||||
p->num_inst = 0;
|
||||
p->max_inst = code_size / SPE_INST_SIZE;
|
||||
|
||||
p->set_count = 0;
|
||||
memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0]));
|
||||
|
||||
/* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
|
||||
*/
|
||||
p->regs[0] = ~7;
|
||||
p->regs[1] = (1U << (80 - 64)) - 1;
|
||||
p->regs[0] = p->regs[1] = p->regs[2] = 1;
|
||||
for (i = 80; i <= 127; i++) {
|
||||
p->regs[i] = 1;
|
||||
}
|
||||
|
||||
p->print = false;
|
||||
p->indent = 0;
|
||||
|
|
@ -398,12 +405,8 @@ int spe_allocate_available_register(struct spe_function *p)
|
|||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < SPE_NUM_REGS; i++) {
|
||||
const uint64_t mask = (1ULL << (i % 64));
|
||||
const unsigned idx = i / 64;
|
||||
|
||||
assert(idx < 2);
|
||||
if ((p->regs[idx] & mask) != 0) {
|
||||
p->regs[idx] &= ~mask;
|
||||
if (p->regs[i] == 0) {
|
||||
p->regs[i] = 1;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
|
@ -417,31 +420,68 @@ int spe_allocate_available_register(struct spe_function *p)
|
|||
*/
|
||||
int spe_allocate_register(struct spe_function *p, int reg)
|
||||
{
|
||||
const unsigned idx = reg / 64;
|
||||
const unsigned bit = reg % 64;
|
||||
|
||||
assert(reg < SPE_NUM_REGS);
|
||||
assert((p->regs[idx] & (1ULL << bit)) != 0);
|
||||
|
||||
p->regs[idx] &= ~(1ULL << bit);
|
||||
assert(p->regs[reg] == 0);
|
||||
p->regs[reg] = 1;
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Mark the given SPE register as "unallocated".
|
||||
* Mark the given SPE register as "unallocated". Note that this should
|
||||
* only be used on registers allocated in the current register set; an
|
||||
* assertion will fail if an attempt is made to deallocate a register
|
||||
* allocated in an earlier register set.
|
||||
*/
|
||||
void spe_release_register(struct spe_function *p, int reg)
|
||||
{
|
||||
const unsigned idx = reg / 64;
|
||||
const unsigned bit = reg % 64;
|
||||
|
||||
assert(idx < 2);
|
||||
|
||||
assert(reg < SPE_NUM_REGS);
|
||||
assert((p->regs[idx] & (1ULL << bit)) == 0);
|
||||
assert(p->regs[reg] == 1);
|
||||
|
||||
p->regs[idx] |= (1ULL << bit);
|
||||
p->regs[reg] = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a new set of registers. This can be called if
|
||||
* it will be difficult later to determine exactly what
|
||||
* registers were actually allocated during a code generation
|
||||
* sequence, and you really just want to deallocate all of them.
|
||||
*/
|
||||
void spe_allocate_register_set(struct spe_function *p)
|
||||
{
|
||||
register unsigned int i;
|
||||
|
||||
/* Keep track of the set count. If it ever wraps around to 0,
|
||||
* we're in trouble.
|
||||
*/
|
||||
p->set_count++;
|
||||
assert(p->set_count > 0);
|
||||
|
||||
/* Increment the allocation count of all registers currently
|
||||
* allocated. Then any registers that are allocated in this set
|
||||
* will be the only ones with a count of 1; they'll all be released
|
||||
* when the register set is released.
|
||||
*/
|
||||
for (i = 0; i < SPE_NUM_REGS; i++) {
|
||||
if (p->regs[i] > 0) p->regs[i]++;
|
||||
}
|
||||
}
|
||||
|
||||
void spe_release_register_set(struct spe_function *p)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* If the set count drops below zero, we're in trouble. */
|
||||
assert(p->set_count > 0);
|
||||
p->set_count--;
|
||||
|
||||
/* Drop the allocation level of all registers. Any allocated
|
||||
* during this register set will drop to 0 and then become
|
||||
* available.
|
||||
*/
|
||||
for (i = 0; i < SPE_NUM_REGS; i++) {
|
||||
if (p->regs[i] > 0) p->regs[i]--;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -603,8 +643,10 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui)
|
|||
{
|
||||
/* If the whole value is in the lower 18 bits, use ila, which
|
||||
* doesn't sign-extend. Otherwise, if the two halfwords of
|
||||
* the constant are identical, use ilh. Otherwise, we have
|
||||
* to use ilhu followed by iohl.
|
||||
* the constant are identical, use ilh. Otherwise, if every byte of
|
||||
* the desired value is 0x00 or 0xff, we can use Form Select Mask for
|
||||
* Bytes Immediate (fsmbi) to load the value in a single instruction.
|
||||
* Otherwise, in the general case, we have to use ilhu followed by iohl.
|
||||
*/
|
||||
if ((ui & 0xfffc0000) == ui) {
|
||||
spe_ila(p, rT, ui);
|
||||
|
|
@ -612,13 +654,171 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui)
|
|||
else if ((ui >> 16) == (ui & 0xffff)) {
|
||||
spe_ilh(p, rT, ui & 0xffff);
|
||||
}
|
||||
else if (
|
||||
((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) &&
|
||||
((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) &&
|
||||
((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) &&
|
||||
((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000)
|
||||
) {
|
||||
unsigned int mask = 0;
|
||||
/* fsmbi duplicates each bit in the given mask eight times,
|
||||
* using a 16-bit value to initialize a 16-byte quadword.
|
||||
* Each 4-bit nybble of the mask corresponds to a full word
|
||||
* of the result; look at the value and figure out the mask
|
||||
* (replicated for each word in the quadword), and then
|
||||
* form the "select mask" to get the value.
|
||||
*/
|
||||
if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111;
|
||||
if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222;
|
||||
if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444;
|
||||
if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888;
|
||||
spe_fsmbi(p, rT, mask);
|
||||
}
|
||||
else {
|
||||
/* The general case: this usually uses two instructions, but
|
||||
* may use only one if the low-order 16 bits of each word are 0.
|
||||
*/
|
||||
spe_ilhu(p, rT, ui >> 16);
|
||||
if (ui & 0xffff)
|
||||
spe_iohl(p, rT, ui & 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function is constructed identically to spe_sor_uint() below.
|
||||
* Changes to one should be made in the other.
|
||||
*/
|
||||
void spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui)
|
||||
{
|
||||
/* If we can, emit a single instruction, either And Byte Immediate
|
||||
* (which uses the same constant across each byte), And Halfword Immediate
|
||||
* (which sign-extends a 10-bit immediate to 16 bits and uses that
|
||||
* across each halfword), or And Word Immediate (which sign-extends
|
||||
* a 10-bit immediate to 32 bits).
|
||||
*
|
||||
* Otherwise, we'll need to use a temporary register.
|
||||
*/
|
||||
register unsigned int tmp;
|
||||
|
||||
/* If the upper 23 bits are all 0s or all 1s, sign extension
|
||||
* will work and we can use And Word Immediate
|
||||
*/
|
||||
tmp = ui & 0xfffffe00;
|
||||
if (tmp == 0xfffffe00 || tmp == 0) {
|
||||
spe_andi(p, rT, rA, ui & 0x000003ff);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If the ui field is symmetric along halfword boundaries and
|
||||
* the upper 7 bits of each halfword are all 0s or 1s, we
|
||||
* can use And Halfword Immediate
|
||||
*/
|
||||
tmp = ui & 0xfe00fe00;
|
||||
if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
|
||||
spe_andhi(p, rT, rA, ui & 0x000003ff);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If the ui field is symmetric in each byte, then we can use
|
||||
* the And Byte Immediate instruction.
|
||||
*/
|
||||
tmp = ui & 0x000000ff;
|
||||
if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
|
||||
spe_andbi(p, rT, rA, tmp);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Otherwise, we'll have to use a temporary register. */
|
||||
unsigned int tmp_reg = spe_allocate_available_register(p);
|
||||
spe_load_uint(p, tmp_reg, ui);
|
||||
spe_and(p, rT, rA, tmp_reg);
|
||||
spe_release_register(p, tmp_reg);
|
||||
}
|
||||
|
||||
/* This function is constructed identically to spe_and_uint() above.
|
||||
* Changes to one should be made in the other.
|
||||
*/
|
||||
void spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui)
|
||||
{
|
||||
/* If we can, emit a single instruction, either Exclusive Or Byte
|
||||
* Immediate (which uses the same constant across each byte), Exclusive
|
||||
* Or Halfword Immediate (which sign-extends a 10-bit immediate to
|
||||
* 16 bits and uses that across each halfword), or Exclusive Or Word
|
||||
* Immediate (which sign-extends a 10-bit immediate to 32 bits).
|
||||
*
|
||||
* Otherwise, we'll need to use a temporary register.
|
||||
*/
|
||||
register unsigned int tmp;
|
||||
|
||||
/* If the upper 23 bits are all 0s or all 1s, sign extension
|
||||
* will work and we can use Exclusive Or Word Immediate
|
||||
*/
|
||||
tmp = ui & 0xfffffe00;
|
||||
if (tmp == 0xfffffe00 || tmp == 0) {
|
||||
spe_xori(p, rT, rA, ui & 0x000003ff);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If the ui field is symmetric along halfword boundaries and
|
||||
* the upper 7 bits of each halfword are all 0s or 1s, we
|
||||
* can use Exclusive Or Halfword Immediate
|
||||
*/
|
||||
tmp = ui & 0xfe00fe00;
|
||||
if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
|
||||
spe_xorhi(p, rT, rA, ui & 0x000003ff);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If the ui field is symmetric in each byte, then we can use
|
||||
* the Exclusive Or Byte Immediate instruction.
|
||||
*/
|
||||
tmp = ui & 0x000000ff;
|
||||
if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
|
||||
spe_xorbi(p, rT, rA, tmp);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Otherwise, we'll have to use a temporary register. */
|
||||
unsigned int tmp_reg = spe_allocate_available_register(p);
|
||||
spe_load_uint(p, tmp_reg, ui);
|
||||
spe_xor(p, rT, rA, tmp_reg);
|
||||
spe_release_register(p, tmp_reg);
|
||||
}
|
||||
|
||||
void
|
||||
spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui)
|
||||
{
|
||||
/* If the comparison value is 9 bits or less, it fits inside a
|
||||
* Compare Equal Word Immediate instruction.
|
||||
*/
|
||||
if ((ui & 0x000001ff) == ui) {
|
||||
spe_ceqi(p, rT, rA, ui);
|
||||
}
|
||||
/* Otherwise, we're going to have to load a word first. */
|
||||
else {
|
||||
unsigned int tmp_reg = spe_allocate_available_register(p);
|
||||
spe_load_uint(p, tmp_reg, ui);
|
||||
spe_ceq(p, rT, rA, tmp_reg);
|
||||
spe_release_register(p, tmp_reg);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui)
|
||||
{
|
||||
/* If the comparison value is 10 bits or less, it fits inside a
|
||||
* Compare Logical Greater Than Word Immediate instruction.
|
||||
*/
|
||||
if ((ui & 0x000003ff) == ui) {
|
||||
spe_clgti(p, rT, rA, ui);
|
||||
}
|
||||
/* Otherwise, we're going to have to load a word first. */
|
||||
else {
|
||||
unsigned int tmp_reg = spe_allocate_available_register(p);
|
||||
spe_load_uint(p, tmp_reg, ui);
|
||||
spe_clgt(p, rT, rA, tmp_reg);
|
||||
spe_release_register(p, tmp_reg);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
|
||||
|
|
|
|||
|
|
@ -53,17 +53,26 @@ struct spe_function
|
|||
uint num_inst;
|
||||
uint max_inst;
|
||||
|
||||
/**
|
||||
* Mask of used / unused registers
|
||||
*
|
||||
* Each set bit corresponds to an available register. Each cleared bit
|
||||
* corresponds to an allocated register.
|
||||
/**
|
||||
* The "set count" reflects the number of nested register sets
|
||||
* are allowed. In the unlikely case that we exceed the set count,
|
||||
* register allocation will start to be confused, which is critical
|
||||
* enough that we check for it.
|
||||
*/
|
||||
unsigned char set_count;
|
||||
|
||||
/**
|
||||
* Flags for used and unused registers. Each byte corresponds to a
|
||||
* register; a 0 in that byte means that the register is available.
|
||||
* A value of 1 means that the register was allocated in the current
|
||||
* register set. Any other value N means that the register was allocated
|
||||
* N register sets ago.
|
||||
*
|
||||
* \sa
|
||||
* spe_allocate_register, spe_allocate_available_register,
|
||||
* spe_release_register
|
||||
* spe_allocate_register_set, spe_release_register_set, spe_release_register,
|
||||
*/
|
||||
uint64_t regs[SPE_NUM_REGS / 64];
|
||||
unsigned char regs[SPE_NUM_REGS];
|
||||
|
||||
boolean print; /**< print/dump instructions as they're emitted? */
|
||||
int indent; /**< number of spaces to indent */
|
||||
|
|
@ -77,6 +86,8 @@ extern unsigned spe_code_size(const struct spe_function *p);
|
|||
extern int spe_allocate_available_register(struct spe_function *p);
|
||||
extern int spe_allocate_register(struct spe_function *p, int reg);
|
||||
extern void spe_release_register(struct spe_function *p, int reg);
|
||||
extern void spe_allocate_register_set(struct spe_function *p);
|
||||
extern void spe_release_register_set(struct spe_function *p);
|
||||
|
||||
extern void spe_print_code(struct spe_function *p, boolean enable);
|
||||
extern void spe_indent(struct spe_function *p, int spaces);
|
||||
|
|
@ -307,6 +318,22 @@ spe_load_int(struct spe_function *p, unsigned rT, int i);
|
|||
extern void
|
||||
spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui);
|
||||
|
||||
/** And immediate value into rT. */
|
||||
extern void
|
||||
spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
|
||||
|
||||
/** Xor immediate value into rT. */
|
||||
extern void
|
||||
spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
|
||||
|
||||
/** Compare equal with immediate value. */
|
||||
extern void
|
||||
spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
|
||||
|
||||
/** Compare greater with immediate value. */
|
||||
extern void
|
||||
spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
|
||||
|
||||
/** Replicate word 0 of rA across rT. */
|
||||
extern void
|
||||
spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
|
||||
|
|
|
|||
|
|
@ -227,6 +227,7 @@ struct cell_command_render
|
|||
float xmin, ymin, xmax, ymax; /* XXX another dummy field */
|
||||
uint min_index;
|
||||
boolean inline_verts;
|
||||
uint front_winding; /* the rasterizer needs to be able to determine facing to apply front/back-facing stencil */
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -152,6 +152,7 @@ cell_flush_prim_buffer(struct cell_context *cell)
|
|||
struct cell_command_render *render = &cell_global.command[i].render;
|
||||
render->prim_type = PIPE_PRIM_TRIANGLES;
|
||||
render->num_verts = cell->prim_buffer.num_verts;
|
||||
render->front_winding = cell->rasterizer->front_winding;
|
||||
render->vertex_size = cell->vertex_info->size * 4;
|
||||
render->xmin = cell->prim_buffer.xmin;
|
||||
render->ymin = cell->prim_buffer.ymin;
|
||||
|
|
|
|||
|
|
@ -214,6 +214,7 @@ cell_vbuf_draw(struct vbuf_render *vbr,
|
|||
|
||||
render->opcode = CELL_CMD_RENDER;
|
||||
render->prim_type = cvbr->prim;
|
||||
render->front_winding = cell->rasterizer->front_winding;
|
||||
|
||||
render->num_indexes = nr_indices;
|
||||
render->min_index = min_index;
|
||||
|
|
|
|||
|
|
@ -73,7 +73,8 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
|
|||
vector float fragGreen,
|
||||
vector float fragBlue,
|
||||
vector float fragAlpha,
|
||||
vector unsigned int mask);
|
||||
vector unsigned int mask,
|
||||
uint facing);
|
||||
|
||||
/** Function for running fragment program */
|
||||
typedef void (*spu_fragment_program_func)(vector float *inputs,
|
||||
|
|
|
|||
|
|
@ -57,7 +57,8 @@ spu_fallback_fragment_ops(uint x, uint y,
|
|||
vector float fragG,
|
||||
vector float fragB,
|
||||
vector float fragA,
|
||||
vector unsigned int mask)
|
||||
vector unsigned int mask,
|
||||
uint facing)
|
||||
{
|
||||
vector float frag_aos[4];
|
||||
unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
|
||||
|
|
@ -433,23 +434,23 @@ spu_fallback_fragment_ops(uint x, uint y,
|
|||
/* Form bitmask depending on color buffer format and colormask bits */
|
||||
switch (spu.fb.color_format) {
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
if (spu.blend.colormask & (1<<0))
|
||||
if (spu.blend.colormask & PIPE_MASK_R)
|
||||
cmask |= 0x00ff0000; /* red */
|
||||
if (spu.blend.colormask & (1<<1))
|
||||
if (spu.blend.colormask & PIPE_MASK_G)
|
||||
cmask |= 0x0000ff00; /* green */
|
||||
if (spu.blend.colormask & (1<<2))
|
||||
if (spu.blend.colormask & PIPE_MASK_B)
|
||||
cmask |= 0x000000ff; /* blue */
|
||||
if (spu.blend.colormask & (1<<3))
|
||||
if (spu.blend.colormask & PIPE_MASK_A)
|
||||
cmask |= 0xff000000; /* alpha */
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
if (spu.blend.colormask & (1<<0))
|
||||
if (spu.blend.colormask & PIPE_MASK_R)
|
||||
cmask |= 0x0000ff00; /* red */
|
||||
if (spu.blend.colormask & (1<<1))
|
||||
if (spu.blend.colormask & PIPE_MASK_G)
|
||||
cmask |= 0x00ff0000; /* green */
|
||||
if (spu.blend.colormask & (1<<2))
|
||||
if (spu.blend.colormask & PIPE_MASK_B)
|
||||
cmask |= 0xff000000; /* blue */
|
||||
if (spu.blend.colormask & (1<<3))
|
||||
if (spu.blend.colormask & PIPE_MASK_A)
|
||||
cmask |= 0x000000ff; /* alpha */
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -38,7 +38,8 @@ spu_fallback_fragment_ops(uint x, uint y,
|
|||
vector float fragGreen,
|
||||
vector float fragBlue,
|
||||
vector float fragAlpha,
|
||||
vector unsigned int mask);
|
||||
vector unsigned int mask,
|
||||
uint facing);
|
||||
|
||||
|
||||
#endif /* SPU_PER_FRAGMENT_OP */
|
||||
|
|
|
|||
|
|
@ -279,7 +279,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
|
|||
v1 = (const float *) (vertices + indexes[j+1] * vertex_size);
|
||||
v2 = (const float *) (vertices + indexes[j+2] * vertex_size);
|
||||
|
||||
drawn += tri_draw(v0, v1, v2, tx, ty);
|
||||
drawn += tri_draw(v0, v1, v2, tx, ty, render->front_winding);
|
||||
}
|
||||
|
||||
//printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3);
|
||||
|
|
@ -297,5 +297,3 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
|
|||
printf("SPU %u: RENDER done\n",
|
||||
spu.init.id);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -118,6 +118,8 @@ struct setup_stage {
|
|||
|
||||
float oneoverarea;
|
||||
|
||||
uint facing;
|
||||
|
||||
uint tx, ty;
|
||||
|
||||
int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
|
||||
|
|
@ -274,7 +276,7 @@ eval_z(float x, float y)
|
|||
* overall.
|
||||
*/
|
||||
static INLINE void
|
||||
emit_quad( int x, int y, mask_t mask )
|
||||
emit_quad( int x, int y, mask_t mask)
|
||||
{
|
||||
/* If any bits in mask are set... */
|
||||
if (spu_extract(spu_orx(mask), 0)) {
|
||||
|
|
@ -344,7 +346,8 @@ emit_quad( int x, int y, mask_t mask )
|
|||
fragZ,
|
||||
soa_frag[0], soa_frag[1],
|
||||
soa_frag[2], soa_frag[3],
|
||||
mask);
|
||||
mask,
|
||||
setup.facing);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -379,7 +382,8 @@ emit_quad( int x, int y, mask_t mask )
|
|||
outputs[0*4+1],
|
||||
outputs[0*4+2],
|
||||
outputs[0*4+3],
|
||||
mask);
|
||||
mask,
|
||||
setup.facing);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -483,7 +487,7 @@ static void flush_spans( void )
|
|||
*/
|
||||
for (x = block(minleft); x <= block(maxright); x += 2) {
|
||||
#if 1
|
||||
emit_quad( x, setup.span.y, calculate_mask( x ) );
|
||||
emit_quad( x, setup.span.y, calculate_mask( x ));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -902,13 +906,28 @@ static void subtriangle( struct edge *eleft,
|
|||
eright->sy += lines;
|
||||
}
|
||||
|
||||
static float
|
||||
determinant( const float *v0,
|
||||
const float *v1,
|
||||
const float *v2 )
|
||||
{
|
||||
/* edge vectors e = v0 - v2, f = v1 - v2 */
|
||||
const float ex = v0[0] - v2[0];
|
||||
const float ey = v0[1] - v2[1];
|
||||
const float fx = v1[0] - v2[0];
|
||||
const float fy = v1[1] - v2[1];
|
||||
|
||||
/* det = cross(e,f).z */
|
||||
return ex * fy - ey * fx;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Draw triangle into tile at (tx, ty) (tile coords)
|
||||
* The tile data should have already been fetched.
|
||||
*/
|
||||
boolean
|
||||
tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
|
||||
tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding)
|
||||
{
|
||||
setup.tx = tx;
|
||||
setup.ty = ty;
|
||||
|
|
@ -919,6 +938,12 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
|
|||
setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
|
||||
setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
|
||||
|
||||
/* Before we sort vertices, determine the facing of the triangle,
|
||||
* which will be needed for front/back-face stencil application
|
||||
*/
|
||||
float det = determinant(v0, v1, v2);
|
||||
setup.facing = (det > 0.0) ^ (front_winding == PIPE_WINDING_CW);
|
||||
|
||||
if (!setup_sort_vertices((struct vertex_header *) v0,
|
||||
(struct vertex_header *) v1,
|
||||
(struct vertex_header *) v2)) {
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@
|
|||
|
||||
|
||||
extern boolean
|
||||
tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty);
|
||||
tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding);
|
||||
|
||||
|
||||
#endif /* SPU_TRI_H */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue