r300/compiler: Fix nested flow control in r500 vertex shaders

This commit is contained in:
Tom Stellard 2011-09-20 21:05:55 -07:00 committed by Tom Stellard
parent 4a269a8dc0
commit b2df031a95
11 changed files with 438 additions and 178 deletions

View file

@ -46,6 +46,7 @@ C_SOURCES := \
compiler/radeon_optimize.c \
compiler/radeon_remove_constants.c \
compiler/radeon_rename_regs.c \
compiler/radeon_vert_fc.c \
compiler/radeon_variable.c \
compiler/r3xx_fragprog.c \
compiler/r300_fragprog.c \

View file

@ -28,17 +28,13 @@
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
#include "radeon_remove_constants.h"
struct loop {
int BgnLoop;
};
/*
* Take an already-setup and valid source then swizzle it appropriately to
* obtain a constant ZERO or ONE source.
@ -359,140 +355,13 @@ static void ei_pow(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
static void mark_write(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
unsigned int * writemasks = userdata;
if (file != RC_FILE_TEMPORARY)
return;
if (index >= R300_VS_MAX_TEMPS)
return;
writemasks[index] |= mask;
}
static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
{
return PVS_SRC_OPERAND(compiler->PredicateIndex,
t_swizzle(RC_SWIZZLE_ZERO),
t_swizzle(RC_SWIZZLE_ZERO),
t_swizzle(RC_SWIZZLE_ZERO),
t_swizzle(RC_SWIZZLE_W),
t_src_class(RC_FILE_TEMPORARY),
0);
}
static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
unsigned int hw_opcode, int is_math)
{
return PVS_OP_DST_OPERAND(hw_opcode,
is_math,
0,
compiler->PredicateIndex,
RC_MASK_W,
t_dst_class(RC_FILE_TEMPORARY));
}
static void ei_if(struct r300_vertex_program_compiler * compiler,
struct rc_instruction *rci,
unsigned int * inst,
unsigned int branch_depth)
{
unsigned int predicate_opcode;
int is_math = 0;
if (!compiler->Base.is_r500) {
rc_error(&compiler->Base,"Opcode IF not supported\n");
return;
}
/* Reserve a temporary to use as our predicate stack counter, if we
* don't already have one. */
if (!compiler->PredicateMask) {
unsigned int writemasks[RC_REGISTER_MAX_INDEX];
struct rc_instruction * inst;
unsigned int i;
memset(writemasks, 0, sizeof(writemasks));
for(inst = compiler->Base.Program.Instructions.Next;
inst != &compiler->Base.Program.Instructions;
inst = inst->Next) {
rc_for_all_writes_mask(inst, mark_write, writemasks);
}
for(i = 0; i < compiler->Base.max_temp_regs; i++) {
unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
/* Only the W component can be used fo the predicate
* stack counter. */
if (mask & RC_MASK_W) {
compiler->PredicateMask = RC_MASK_W;
compiler->PredicateIndex = i;
break;
}
}
if (i == compiler->Base.max_temp_regs) {
rc_error(&compiler->Base, "No free temporary to use for"
" predicate stack counter.\n");
return;
}
}
predicate_opcode =
branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
if (branch_depth == 0) {
is_math = 1;
predicate_opcode = ME_PRED_SET_NEQ;
inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
inst[2] = 0;
} else {
predicate_opcode = VE_PRED_SET_NEQ_PUSH;
inst[1] = t_pred_src(compiler);
inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
}
inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
inst[3] = 0;
}
static void ei_else(struct r300_vertex_program_compiler * compiler,
unsigned int * inst)
{
if (!compiler->Base.is_r500) {
rc_error(&compiler->Base,"Opcode ELSE not supported\n");
return;
}
inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
inst[1] = t_pred_src(compiler);
inst[2] = 0;
inst[3] = 0;
}
static void ei_endif(struct r300_vertex_program_compiler *compiler,
unsigned int * inst)
{
if (!compiler->Base.is_r500) {
rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
return;
}
inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
inst[1] = t_pred_src(compiler);
inst[2] = 0;
inst[3] = 0;
}
static void translate_vertex_program(struct radeon_compiler *c, void *user)
{
struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
struct rc_instruction *rci;
struct loop * loops = NULL;
int current_loop_depth = 0;
int loops_reserved = 0;
unsigned int branch_depth = 0;
unsigned loops[R500_PVS_MAX_LOOP_DEPTH];
unsigned loop_depth = 0;
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
@ -532,12 +401,9 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
@ -556,37 +422,27 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
case RC_OPCODE_BGNLOOP:
{
struct loop * l;
if ((!compiler->Base.is_r500
&& loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
|| loops_reserved >= R500_VS_MAX_FC_DEPTH) {
&& loop_depth >= R300_VS_MAX_LOOP_DEPTH)
|| loop_depth >= R500_PVS_MAX_LOOP_DEPTH) {
rc_error(&compiler->Base,
"Loops are nested too deep.");
return;
}
memory_pool_array_reserve(&compiler->Base.Pool,
struct loop, loops, current_loop_depth,
loops_reserved, 1);
l = &loops[current_loop_depth++];
memset(l , 0, sizeof(struct loop));
l->BgnLoop = (compiler->code->length / 4);
continue;
loops[loop_depth++] = ((compiler->code->length)/ 4) + 1;
break;
}
case RC_OPCODE_ENDLOOP:
{
struct loop * l;
unsigned int act_addr;
unsigned int last_addr;
unsigned int ret_addr;
assert(loops);
l = &loops[current_loop_depth - 1];
act_addr = l->BgnLoop - 1;
ret_addr = loops[--loop_depth];
act_addr = ret_addr - 1;
last_addr = (compiler->code->length / 4) - 1;
ret_addr = l->BgnLoop;
if (loops_reserved >= R300_VS_MAX_FC_OPS) {
if (loop_depth >= R300_VS_MAX_FC_OPS) {
rc_error(&compiler->Base,
"Too many flow control instructions.");
return;
@ -595,7 +451,7 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
compiler->code->fc_op_addrs.r500
[compiler->code->num_fc_ops].lw =
R500_PVS_FC_ACT_ADRS(act_addr)
| R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
| R500_PVS_FC_LOOP_CNT_JMP_INST(0x00ff)
;
compiler->code->fc_op_addrs.r500
[compiler->code->num_fc_ops].uw =
@ -618,26 +474,51 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
compiler->code->num_fc_ops);
compiler->code->num_fc_ops++;
current_loop_depth--;
continue;
break;
}
case RC_ME_PRED_SET_CLR:
ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst);
break;
case RC_ME_PRED_SET_INV:
ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst);
break;
case RC_ME_PRED_SET_POP:
ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst);
break;
case RC_ME_PRED_SET_RESTORE:
ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst);
break;
case RC_ME_PRED_SEQ:
ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst);
break;
case RC_ME_PRED_SNEQ:
ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst);
break;
case RC_VE_PRED_SNEQ_PUSH:
ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH,
vpi, inst);
break;
default:
rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
return;
}
/* Non-flow control instructions that are inside an if statement
* need to pay attention to the predicate bit. */
if (branch_depth
&& vpi->Opcode != RC_OPCODE_IF
&& vpi->Opcode != RC_OPCODE_ELSE
&& vpi->Opcode != RC_OPCODE_ENDIF) {
if (vpi->DstReg.Pred != RC_PRED_DISABLED) {
inst[0] |= (PVS_DST_PRED_ENABLE_MASK
<< PVS_DST_PRED_ENABLE_SHIFT);
inst[0] |= (PVS_DST_PRED_SENSE_MASK
if (vpi->DstReg.Pred == RC_PRED_SET) {
inst[0] |= (PVS_DST_PRED_SENSE_MASK
<< PVS_DST_PRED_SENSE_SHIFT);
}
}
/* Update the number of temporaries. */
@ -650,10 +531,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
if (compiler->PredicateMask)
if (compiler->PredicateIndex >= compiler->code->num_temporaries)
compiler->code->num_temporaries = compiler->PredicateIndex + 1;
if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
rc_error(&compiler->Base, "Too many temporaries.\n");
return;
@ -1018,7 +895,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
struct radeon_compiler_pass vs_list[] = {
/* NAME DUMP PREDICATE FUNCTION PARAM */
{"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL},
{"transform loops", 1, 1, rc_transform_loops, NULL},
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
{"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL},
{"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500},
@ -1030,6 +906,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts},
{"register allocation", 1, opt, allocate_temporary_registers, NULL},
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"lower control flow opcodes", 1, is_r500, rc_vert_fc, NULL},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, 1, translate_vertex_program, NULL},
{"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL},

View file

@ -190,16 +190,25 @@ void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
for(i = 0; i < vs->num_fc_ops; i++) {
unsigned is_loop = 0;
switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
case 0: fprintf(stderr, "NOP"); break;
case 1: fprintf(stderr, "JUMP"); break;
case 2: fprintf(stderr, "LOOP"); break;
case 2: fprintf(stderr, "LOOP"); is_loop = 1; break;
case 3: fprintf(stderr, "JSR"); break;
}
if (c->Base.is_r500) {
fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x "
"loop data->0x%08x\n",
vs->fc_op_addrs.r500[i].uw,
vs->fc_op_addrs.r500[i].lw);
vs->fc_op_addrs.r500[i].lw,
vs->fc_loop_index[i]);
if (is_loop) {
fprintf(stderr, "Before = %u First = %u Last = %u\n",
vs->fc_op_addrs.r500[i].lw & 0xffff,
(vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff,
vs->fc_op_addrs.r500[i].uw & 0xffff);
}
} else {
fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
}

View file

@ -40,6 +40,9 @@
#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
/* The r500 maximum depth is not just for loops, but any combination of loops
* and subroutine jumps. */
#define R500_PVS_MAX_LOOP_DEPTH 8
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
@ -262,9 +265,6 @@ struct rX00_fragment_program_code {
#define R300_VS_MAX_TEMPS 32
/* This is the max for all chipsets (r300-r500) */
#define R300_VS_MAX_FC_OPS 16
/* The r500 maximum depth is not just for loops, but any combination of loops
* and subroutine jumps. */
#define R500_VS_MAX_FC_DEPTH 8
#define R300_VS_MAX_LOOP_DEPTH 1
#define VSF_MAX_INPUTS 32

View file

@ -137,11 +137,10 @@ struct r300_vertex_program_compiler {
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
int PredicateIndex;
unsigned int PredicateMask;
};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
void rc_vert_fc(struct radeon_compiler *compiler, void *user);
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
struct radeon_compiler_pass {

View file

@ -437,6 +437,78 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
{
.Opcode = RC_OPCODE_KILP,
.Name = "KILP",
},
{
.Opcode = RC_ME_PRED_SEQ,
.Name = "ME_PRED_SEQ",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SGT,
.Name = "ME_PRED_SGT",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SGE,
.Name = "ME_PRED_SGE",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SNEQ,
.Name = "ME_PRED_SNEQ",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SET_CLR,
.Name = "ME_PRED_SET_CLEAR",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SET_INV,
.Name = "ME_PRED_SET_INV",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SET_POP,
.Name = "ME_PRED_SET_POP",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SET_RESTORE,
.Name = "ME_PRED_SET_RESTORE",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_VE_PRED_SEQ_PUSH,
.Name = "VE_PRED_SEQ_PUSH",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_VE_PRED_SGT_PUSH,
.Name = "VE_PRED_SGT_PUSH",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_VE_PRED_SGE_PUSH,
.Name = "VE_PRED_SGE_PUSH",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_VE_PRED_SNEQ_PUSH,
.Name = "VE_PRED_SNEQ_PUSH",
.NumSrcRegs = 2,
.HasDstReg = 1
}
};

View file

@ -217,6 +217,21 @@ typedef enum {
/** Stop execution of the shader (GLSL discard) */
RC_OPCODE_KILP,
/* Vertex shader CF Instructions */
RC_ME_PRED_SEQ,
RC_ME_PRED_SGT,
RC_ME_PRED_SGE,
RC_ME_PRED_SNEQ,
RC_ME_PRED_SET_CLR,
RC_ME_PRED_SET_INV,
RC_ME_PRED_SET_POP,
RC_ME_PRED_SET_RESTORE,
RC_VE_PRED_SEQ_PUSH,
RC_VE_PRED_SGT_PUSH,
RC_VE_PRED_SGE_PUSH,
RC_VE_PRED_SNEQ_PUSH,
MAX_RC_OPCODE
} rc_opcode;

View file

@ -58,6 +58,7 @@ struct rc_dst_register {
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
unsigned int WriteMask:4;
unsigned int Pred:2;
};
struct rc_presub_instruction {

View file

@ -203,4 +203,10 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
#define RC_SOURCE_RGB 0x1
#define RC_SOURCE_ALPHA 0x2
typedef enum {
RC_PRED_DISABLED,
RC_PRED_SET,
RC_PRED_INV
} rc_predicate_mode;
#endif /* RADEON_PROGRAM_CONSTANTS_H */

View file

@ -329,6 +329,12 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
fprintf(f, ")]");
}
if (inst->U.I.DstReg.Pred == RC_PRED_SET) {
fprintf(f, " PRED_SET");
} else if (inst->U.I.DstReg.Pred == RC_PRED_INV) {
fprintf(f, " PRED_INV");
}
fprintf(f, "\n");
}

View file

@ -0,0 +1,274 @@
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_constants.h"
struct vert_fc_state {
struct radeon_compiler *C;
unsigned BranchDepth;
unsigned LoopDepth;
unsigned LoopsReserved;
int PredStack[R500_PVS_MAX_LOOP_DEPTH];
int PredicateReg;
unsigned InCFBreak;
};
static void build_pred_src(
struct rc_src_register * src,
struct vert_fc_state * fc_state)
{
src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
src->File = RC_FILE_TEMPORARY;
src->Index = fc_state->PredicateReg;
}
static void build_pred_dst(
struct rc_dst_register * dst,
struct vert_fc_state * fc_state)
{
dst->WriteMask = RC_MASK_W;
dst->File = RC_FILE_TEMPORARY;
dst->Index = fc_state->PredicateReg;
}
static void mark_write(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
unsigned int * writemasks = userdata;
if (file != RC_FILE_TEMPORARY)
return;
if (index >= R300_VS_MAX_TEMPS)
return;
writemasks[index] |= mask;
}
static int reserve_predicate_reg(struct vert_fc_state * fc_state)
{
int i;
unsigned int writemasks[RC_REGISTER_MAX_INDEX];
struct rc_instruction * inst;
memset(writemasks, 0, sizeof(writemasks));
for(inst = fc_state->C->Program.Instructions.Next;
inst != &fc_state->C->Program.Instructions;
inst = inst->Next) {
rc_for_all_writes_mask(inst, mark_write, writemasks);
}
for(i = 0; i < fc_state->C->max_temp_regs; i++) {
/* Most of the control flow instructions only write the
* W component of the Predicate Register, but
* the docs say that ME_PRED_SET_CLR and
* ME_PRED_SET_RESTORE write all components of the
* register, so we must reserve a register that has
* all its components free. */
if (!writemasks[i]) {
fc_state->PredicateReg = i;
break;
}
}
if (i == fc_state->C->max_temp_regs) {
rc_error(fc_state->C, "No free temporary to use for"
" predicate stack counter.\n");
return -1;
}
return 1;
}
static void lower_bgnloop(
struct rc_instruction * inst,
struct vert_fc_state * fc_state)
{
struct rc_instruction * new_inst =
rc_insert_new_instruction(fc_state->C, inst->Prev);
if ((!fc_state->C->is_r500
&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
|| fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
rc_error(fc_state->C, "Loops are nested too deep.");
return;
}
if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
if (fc_state->PredicateReg == -1) {
if (reserve_predicate_reg(fc_state) == -1) {
return;
}
}
/* Initialize the predicate bit to true. */
new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
new_inst->U.I.SrcReg[0].Index = 0;
new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
} else {
fc_state->PredStack[fc_state->LoopDepth] =
fc_state->PredicateReg;
/* Copy the the current predicate value to this loop's
* predicate register */
/* Use the old predicate value for src0 */
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
/* Reserve this loop's predicate register */
if (reserve_predicate_reg(fc_state) == -1) {
return;
}
/* Copy the old predicate value to the new register */
new_inst->U.I.Opcode = RC_OPCODE_ADD;
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
new_inst->U.I.SrcReg[1].Index = 0;
new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
}
}
static void lower_brk(
struct rc_instruction * inst,
struct vert_fc_state * fc_state)
{
if (fc_state->LoopDepth == 1) {
inst->U.I.Opcode = RC_OPCODE_RCP;
inst->U.I.DstReg.Pred = RC_PRED_INV;
inst->U.I.SrcReg[0].Index = 0;
inst->U.I.SrcReg[0].File = RC_FILE_NONE;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
} else {
inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
inst->U.I.DstReg.Pred = RC_PRED_SET;
}
build_pred_dst(&inst->U.I.DstReg, fc_state);
}
static void lower_endloop(
struct rc_instruction * inst,
struct vert_fc_state * fc_state)
{
struct rc_instruction * new_inst =
rc_insert_new_instruction(fc_state->C, inst);
new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
/* Restore the previous predicate register. */
fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
}
static void lower_if(
struct rc_instruction * inst,
struct vert_fc_state * fc_state)
{
/* Reserve a temporary to use as our predicate stack counter, if we
* don't already have one. */
if (fc_state->PredicateReg == -1) {
/* If we are inside a loop, the Predicate Register should
* have already been defined. */
assert(fc_state->LoopDepth == 0);
if (reserve_predicate_reg(fc_state) == -1) {
return;
}
}
if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
fc_state->InCFBreak = 1;
}
if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
if (fc_state->InCFBreak) {
inst->U.I.Opcode = RC_ME_PRED_SEQ;
inst->U.I.DstReg.Pred = RC_PRED_SET;
} else {
inst->U.I.Opcode = RC_ME_PRED_SNEQ;
}
} else {
unsigned swz;
inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
sizeof(inst->U.I.SrcReg[1]));
swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
* w component */
inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
build_pred_src(&inst->U.I.SrcReg[0], fc_state);
}
build_pred_dst(&inst->U.I.DstReg, fc_state);
}
void rc_vert_fc(struct radeon_compiler *c, void *user)
{
struct rc_instruction * inst;
struct vert_fc_state fc_state;
memset(&fc_state, 0, sizeof(fc_state));
fc_state.PredicateReg = -1;
fc_state.C = c;
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
switch (inst->U.I.Opcode) {
case RC_OPCODE_BGNLOOP:
lower_bgnloop(inst, &fc_state);
fc_state.LoopDepth++;
break;
case RC_OPCODE_BRK:
lower_brk(inst, &fc_state);
break;
case RC_OPCODE_ENDLOOP:
if (fc_state.BranchDepth != 0
|| fc_state.LoopDepth != 1) {
lower_endloop(inst, &fc_state);
}
fc_state.LoopDepth--;
/* Skip PRED_RESTORE */
inst = inst->Next;
break;
case RC_OPCODE_IF:
lower_if(inst, &fc_state);
fc_state.BranchDepth++;
break;
case RC_OPCODE_ELSE:
inst->U.I.Opcode = RC_ME_PRED_SET_INV;
build_pred_dst(&inst->U.I.DstReg, &fc_state);
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
break;
case RC_OPCODE_ENDIF:
if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
struct rc_instruction * to_delete = inst;
inst = inst->Prev;
rc_remove_instruction(to_delete);
/* XXX: Delete the endif instruction */
} else {
inst->U.I.Opcode = RC_ME_PRED_SET_POP;
build_pred_dst(&inst->U.I.DstReg, &fc_state);
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
}
fc_state.InCFBreak = 0;
fc_state.BranchDepth--;
break;
default:
if (fc_state.BranchDepth || fc_state.LoopDepth) {
inst->U.I.DstReg.Pred = RC_PRED_SET;
}
break;
}
}
}