mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
r300/compiler: Fix nested flow control in r500 vertex shaders
This commit is contained in:
parent
4a269a8dc0
commit
b2df031a95
11 changed files with 438 additions and 178 deletions
|
|
@ -46,6 +46,7 @@ C_SOURCES := \
|
|||
compiler/radeon_optimize.c \
|
||||
compiler/radeon_remove_constants.c \
|
||||
compiler/radeon_rename_regs.c \
|
||||
compiler/radeon_vert_fc.c \
|
||||
compiler/radeon_variable.c \
|
||||
compiler/r3xx_fragprog.c \
|
||||
compiler/r300_fragprog.c \
|
||||
|
|
|
|||
|
|
@ -28,17 +28,13 @@
|
|||
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_dataflow.h"
|
||||
#include "radeon_program.h"
|
||||
#include "radeon_program_alu.h"
|
||||
#include "radeon_swizzle.h"
|
||||
#include "radeon_emulate_branches.h"
|
||||
#include "radeon_emulate_loops.h"
|
||||
#include "radeon_remove_constants.h"
|
||||
|
||||
struct loop {
|
||||
int BgnLoop;
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
* Take an already-setup and valid source then swizzle it appropriately to
|
||||
* obtain a constant ZERO or ONE source.
|
||||
|
|
@ -359,140 +355,13 @@ static void ei_pow(struct r300_vertex_program_code *vp,
|
|||
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
|
||||
}
|
||||
|
||||
static void mark_write(void * userdata, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask)
|
||||
{
|
||||
unsigned int * writemasks = userdata;
|
||||
|
||||
if (file != RC_FILE_TEMPORARY)
|
||||
return;
|
||||
|
||||
if (index >= R300_VS_MAX_TEMPS)
|
||||
return;
|
||||
|
||||
writemasks[index] |= mask;
|
||||
}
|
||||
|
||||
static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
|
||||
{
|
||||
return PVS_SRC_OPERAND(compiler->PredicateIndex,
|
||||
t_swizzle(RC_SWIZZLE_ZERO),
|
||||
t_swizzle(RC_SWIZZLE_ZERO),
|
||||
t_swizzle(RC_SWIZZLE_ZERO),
|
||||
t_swizzle(RC_SWIZZLE_W),
|
||||
t_src_class(RC_FILE_TEMPORARY),
|
||||
0);
|
||||
}
|
||||
|
||||
static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
|
||||
unsigned int hw_opcode, int is_math)
|
||||
{
|
||||
return PVS_OP_DST_OPERAND(hw_opcode,
|
||||
is_math,
|
||||
0,
|
||||
compiler->PredicateIndex,
|
||||
RC_MASK_W,
|
||||
t_dst_class(RC_FILE_TEMPORARY));
|
||||
|
||||
}
|
||||
|
||||
static void ei_if(struct r300_vertex_program_compiler * compiler,
|
||||
struct rc_instruction *rci,
|
||||
unsigned int * inst,
|
||||
unsigned int branch_depth)
|
||||
{
|
||||
unsigned int predicate_opcode;
|
||||
int is_math = 0;
|
||||
|
||||
if (!compiler->Base.is_r500) {
|
||||
rc_error(&compiler->Base,"Opcode IF not supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Reserve a temporary to use as our predicate stack counter, if we
|
||||
* don't already have one. */
|
||||
if (!compiler->PredicateMask) {
|
||||
unsigned int writemasks[RC_REGISTER_MAX_INDEX];
|
||||
struct rc_instruction * inst;
|
||||
unsigned int i;
|
||||
memset(writemasks, 0, sizeof(writemasks));
|
||||
for(inst = compiler->Base.Program.Instructions.Next;
|
||||
inst != &compiler->Base.Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
rc_for_all_writes_mask(inst, mark_write, writemasks);
|
||||
}
|
||||
for(i = 0; i < compiler->Base.max_temp_regs; i++) {
|
||||
unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
|
||||
/* Only the W component can be used fo the predicate
|
||||
* stack counter. */
|
||||
if (mask & RC_MASK_W) {
|
||||
compiler->PredicateMask = RC_MASK_W;
|
||||
compiler->PredicateIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == compiler->Base.max_temp_regs) {
|
||||
rc_error(&compiler->Base, "No free temporary to use for"
|
||||
" predicate stack counter.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
predicate_opcode =
|
||||
branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
|
||||
|
||||
rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
|
||||
if (branch_depth == 0) {
|
||||
is_math = 1;
|
||||
predicate_opcode = ME_PRED_SET_NEQ;
|
||||
inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
|
||||
inst[2] = 0;
|
||||
} else {
|
||||
predicate_opcode = VE_PRED_SET_NEQ_PUSH;
|
||||
inst[1] = t_pred_src(compiler);
|
||||
inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
|
||||
}
|
||||
|
||||
inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
|
||||
inst[3] = 0;
|
||||
|
||||
}
|
||||
|
||||
static void ei_else(struct r300_vertex_program_compiler * compiler,
|
||||
unsigned int * inst)
|
||||
{
|
||||
if (!compiler->Base.is_r500) {
|
||||
rc_error(&compiler->Base,"Opcode ELSE not supported\n");
|
||||
return;
|
||||
}
|
||||
inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
|
||||
inst[1] = t_pred_src(compiler);
|
||||
inst[2] = 0;
|
||||
inst[3] = 0;
|
||||
}
|
||||
|
||||
static void ei_endif(struct r300_vertex_program_compiler *compiler,
|
||||
unsigned int * inst)
|
||||
{
|
||||
if (!compiler->Base.is_r500) {
|
||||
rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
|
||||
return;
|
||||
}
|
||||
inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
|
||||
inst[1] = t_pred_src(compiler);
|
||||
inst[2] = 0;
|
||||
inst[3] = 0;
|
||||
}
|
||||
|
||||
static void translate_vertex_program(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
|
||||
struct rc_instruction *rci;
|
||||
|
||||
struct loop * loops = NULL;
|
||||
int current_loop_depth = 0;
|
||||
int loops_reserved = 0;
|
||||
|
||||
unsigned int branch_depth = 0;
|
||||
unsigned loops[R500_PVS_MAX_LOOP_DEPTH];
|
||||
unsigned loop_depth = 0;
|
||||
|
||||
compiler->code->pos_end = 0; /* Not supported yet */
|
||||
compiler->code->length = 0;
|
||||
|
|
@ -532,12 +401,9 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
|
|||
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
|
||||
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
|
||||
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
|
||||
case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
|
||||
case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
|
||||
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
|
||||
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
|
||||
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
|
||||
case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
|
||||
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
|
||||
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
|
||||
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
|
||||
|
|
@ -556,37 +422,27 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
|
|||
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
{
|
||||
struct loop * l;
|
||||
|
||||
if ((!compiler->Base.is_r500
|
||||
&& loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
|
||||
|| loops_reserved >= R500_VS_MAX_FC_DEPTH) {
|
||||
&& loop_depth >= R300_VS_MAX_LOOP_DEPTH)
|
||||
|| loop_depth >= R500_PVS_MAX_LOOP_DEPTH) {
|
||||
rc_error(&compiler->Base,
|
||||
"Loops are nested too deep.");
|
||||
return;
|
||||
}
|
||||
memory_pool_array_reserve(&compiler->Base.Pool,
|
||||
struct loop, loops, current_loop_depth,
|
||||
loops_reserved, 1);
|
||||
l = &loops[current_loop_depth++];
|
||||
memset(l , 0, sizeof(struct loop));
|
||||
l->BgnLoop = (compiler->code->length / 4);
|
||||
continue;
|
||||
loops[loop_depth++] = ((compiler->code->length)/ 4) + 1;
|
||||
break;
|
||||
}
|
||||
case RC_OPCODE_ENDLOOP:
|
||||
{
|
||||
struct loop * l;
|
||||
unsigned int act_addr;
|
||||
unsigned int last_addr;
|
||||
unsigned int ret_addr;
|
||||
|
||||
assert(loops);
|
||||
l = &loops[current_loop_depth - 1];
|
||||
act_addr = l->BgnLoop - 1;
|
||||
ret_addr = loops[--loop_depth];
|
||||
act_addr = ret_addr - 1;
|
||||
last_addr = (compiler->code->length / 4) - 1;
|
||||
ret_addr = l->BgnLoop;
|
||||
|
||||
if (loops_reserved >= R300_VS_MAX_FC_OPS) {
|
||||
if (loop_depth >= R300_VS_MAX_FC_OPS) {
|
||||
rc_error(&compiler->Base,
|
||||
"Too many flow control instructions.");
|
||||
return;
|
||||
|
|
@ -595,7 +451,7 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
|
|||
compiler->code->fc_op_addrs.r500
|
||||
[compiler->code->num_fc_ops].lw =
|
||||
R500_PVS_FC_ACT_ADRS(act_addr)
|
||||
| R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
|
||||
| R500_PVS_FC_LOOP_CNT_JMP_INST(0x00ff)
|
||||
;
|
||||
compiler->code->fc_op_addrs.r500
|
||||
[compiler->code->num_fc_ops].uw =
|
||||
|
|
@ -618,26 +474,51 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
|
|||
compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
|
||||
compiler->code->num_fc_ops);
|
||||
compiler->code->num_fc_ops++;
|
||||
current_loop_depth--;
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case RC_ME_PRED_SET_CLR:
|
||||
ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst);
|
||||
break;
|
||||
|
||||
case RC_ME_PRED_SET_INV:
|
||||
ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst);
|
||||
break;
|
||||
|
||||
case RC_ME_PRED_SET_POP:
|
||||
ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst);
|
||||
break;
|
||||
|
||||
case RC_ME_PRED_SET_RESTORE:
|
||||
ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst);
|
||||
break;
|
||||
|
||||
case RC_ME_PRED_SEQ:
|
||||
ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst);
|
||||
break;
|
||||
|
||||
case RC_ME_PRED_SNEQ:
|
||||
ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst);
|
||||
break;
|
||||
|
||||
case RC_VE_PRED_SNEQ_PUSH:
|
||||
ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH,
|
||||
vpi, inst);
|
||||
break;
|
||||
|
||||
default:
|
||||
rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Non-flow control instructions that are inside an if statement
|
||||
* need to pay attention to the predicate bit. */
|
||||
if (branch_depth
|
||||
&& vpi->Opcode != RC_OPCODE_IF
|
||||
&& vpi->Opcode != RC_OPCODE_ELSE
|
||||
&& vpi->Opcode != RC_OPCODE_ENDIF) {
|
||||
|
||||
if (vpi->DstReg.Pred != RC_PRED_DISABLED) {
|
||||
inst[0] |= (PVS_DST_PRED_ENABLE_MASK
|
||||
<< PVS_DST_PRED_ENABLE_SHIFT);
|
||||
inst[0] |= (PVS_DST_PRED_SENSE_MASK
|
||||
if (vpi->DstReg.Pred == RC_PRED_SET) {
|
||||
inst[0] |= (PVS_DST_PRED_SENSE_MASK
|
||||
<< PVS_DST_PRED_SENSE_SHIFT);
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the number of temporaries. */
|
||||
|
|
@ -650,10 +531,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
|
|||
vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
|
||||
compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
|
||||
|
||||
if (compiler->PredicateMask)
|
||||
if (compiler->PredicateIndex >= compiler->code->num_temporaries)
|
||||
compiler->code->num_temporaries = compiler->PredicateIndex + 1;
|
||||
|
||||
if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
|
||||
rc_error(&compiler->Base, "Too many temporaries.\n");
|
||||
return;
|
||||
|
|
@ -1018,7 +895,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
|
|||
struct radeon_compiler_pass vs_list[] = {
|
||||
/* NAME DUMP PREDICATE FUNCTION PARAM */
|
||||
{"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL},
|
||||
{"transform loops", 1, 1, rc_transform_loops, NULL},
|
||||
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
|
||||
{"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL},
|
||||
{"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500},
|
||||
|
|
@ -1030,6 +906,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
|
|||
{"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts},
|
||||
{"register allocation", 1, opt, allocate_temporary_registers, NULL},
|
||||
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
|
||||
{"lower control flow opcodes", 1, is_r500, rc_vert_fc, NULL},
|
||||
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
|
||||
{"machine code generation", 0, 1, translate_vertex_program, NULL},
|
||||
{"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL},
|
||||
|
|
|
|||
|
|
@ -190,16 +190,25 @@ void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
|
|||
|
||||
fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
|
||||
for(i = 0; i < vs->num_fc_ops; i++) {
|
||||
unsigned is_loop = 0;
|
||||
switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
|
||||
case 0: fprintf(stderr, "NOP"); break;
|
||||
case 1: fprintf(stderr, "JUMP"); break;
|
||||
case 2: fprintf(stderr, "LOOP"); break;
|
||||
case 2: fprintf(stderr, "LOOP"); is_loop = 1; break;
|
||||
case 3: fprintf(stderr, "JSR"); break;
|
||||
}
|
||||
if (c->Base.is_r500) {
|
||||
fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
|
||||
fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x "
|
||||
"loop data->0x%08x\n",
|
||||
vs->fc_op_addrs.r500[i].uw,
|
||||
vs->fc_op_addrs.r500[i].lw);
|
||||
vs->fc_op_addrs.r500[i].lw,
|
||||
vs->fc_loop_index[i]);
|
||||
if (is_loop) {
|
||||
fprintf(stderr, "Before = %u First = %u Last = %u\n",
|
||||
vs->fc_op_addrs.r500[i].lw & 0xffff,
|
||||
(vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff,
|
||||
vs->fc_op_addrs.r500[i].uw & 0xffff);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,9 @@
|
|||
#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
|
||||
#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
|
||||
|
||||
/* The r500 maximum depth is not just for loops, but any combination of loops
|
||||
* and subroutine jumps. */
|
||||
#define R500_PVS_MAX_LOOP_DEPTH 8
|
||||
|
||||
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
|
||||
|
||||
|
|
@ -262,9 +265,6 @@ struct rX00_fragment_program_code {
|
|||
#define R300_VS_MAX_TEMPS 32
|
||||
/* This is the max for all chipsets (r300-r500) */
|
||||
#define R300_VS_MAX_FC_OPS 16
|
||||
/* The r500 maximum depth is not just for loops, but any combination of loops
|
||||
* and subroutine jumps. */
|
||||
#define R500_VS_MAX_FC_DEPTH 8
|
||||
#define R300_VS_MAX_LOOP_DEPTH 1
|
||||
|
||||
#define VSF_MAX_INPUTS 32
|
||||
|
|
|
|||
|
|
@ -137,11 +137,10 @@ struct r300_vertex_program_compiler {
|
|||
void * UserData;
|
||||
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
|
||||
|
||||
int PredicateIndex;
|
||||
unsigned int PredicateMask;
|
||||
};
|
||||
|
||||
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
|
||||
void rc_vert_fc(struct radeon_compiler *compiler, void *user);
|
||||
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
|
||||
|
||||
struct radeon_compiler_pass {
|
||||
|
|
|
|||
|
|
@ -437,6 +437,78 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
|
|||
{
|
||||
.Opcode = RC_OPCODE_KILP,
|
||||
.Name = "KILP",
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SEQ,
|
||||
.Name = "ME_PRED_SEQ",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SGT,
|
||||
.Name = "ME_PRED_SGT",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SGE,
|
||||
.Name = "ME_PRED_SGE",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SNEQ,
|
||||
.Name = "ME_PRED_SNEQ",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_CLR,
|
||||
.Name = "ME_PRED_SET_CLEAR",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_INV,
|
||||
.Name = "ME_PRED_SET_INV",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_POP,
|
||||
.Name = "ME_PRED_SET_POP",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_RESTORE,
|
||||
.Name = "ME_PRED_SET_RESTORE",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SEQ_PUSH,
|
||||
.Name = "VE_PRED_SEQ_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SGT_PUSH,
|
||||
.Name = "VE_PRED_SGT_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SGE_PUSH,
|
||||
.Name = "VE_PRED_SGE_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SNEQ_PUSH,
|
||||
.Name = "VE_PRED_SNEQ_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -217,6 +217,21 @@ typedef enum {
|
|||
/** Stop execution of the shader (GLSL discard) */
|
||||
RC_OPCODE_KILP,
|
||||
|
||||
/* Vertex shader CF Instructions */
|
||||
RC_ME_PRED_SEQ,
|
||||
RC_ME_PRED_SGT,
|
||||
RC_ME_PRED_SGE,
|
||||
RC_ME_PRED_SNEQ,
|
||||
RC_ME_PRED_SET_CLR,
|
||||
RC_ME_PRED_SET_INV,
|
||||
RC_ME_PRED_SET_POP,
|
||||
RC_ME_PRED_SET_RESTORE,
|
||||
|
||||
RC_VE_PRED_SEQ_PUSH,
|
||||
RC_VE_PRED_SGT_PUSH,
|
||||
RC_VE_PRED_SGE_PUSH,
|
||||
RC_VE_PRED_SNEQ_PUSH,
|
||||
|
||||
MAX_RC_OPCODE
|
||||
} rc_opcode;
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ struct rc_dst_register {
|
|||
unsigned int File:3;
|
||||
unsigned int Index:RC_REGISTER_INDEX_BITS;
|
||||
unsigned int WriteMask:4;
|
||||
unsigned int Pred:2;
|
||||
};
|
||||
|
||||
struct rc_presub_instruction {
|
||||
|
|
|
|||
|
|
@ -203,4 +203,10 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
|
|||
#define RC_SOURCE_RGB 0x1
|
||||
#define RC_SOURCE_ALPHA 0x2
|
||||
|
||||
typedef enum {
|
||||
RC_PRED_DISABLED,
|
||||
RC_PRED_SET,
|
||||
RC_PRED_INV
|
||||
} rc_predicate_mode;
|
||||
|
||||
#endif /* RADEON_PROGRAM_CONSTANTS_H */
|
||||
|
|
|
|||
|
|
@ -329,6 +329,12 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
|
|||
fprintf(f, ")]");
|
||||
}
|
||||
|
||||
if (inst->U.I.DstReg.Pred == RC_PRED_SET) {
|
||||
fprintf(f, " PRED_SET");
|
||||
} else if (inst->U.I.DstReg.Pred == RC_PRED_INV) {
|
||||
fprintf(f, " PRED_INV");
|
||||
}
|
||||
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
|
|
|
|||
274
src/gallium/drivers/r300/compiler/radeon_vert_fc.c
Normal file
274
src/gallium/drivers/r300/compiler/radeon_vert_fc.c
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
|
||||
#include "radeon_compiler.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_dataflow.h"
|
||||
#include "radeon_program.h"
|
||||
#include "radeon_program_constants.h"
|
||||
|
||||
struct vert_fc_state {
|
||||
struct radeon_compiler *C;
|
||||
unsigned BranchDepth;
|
||||
unsigned LoopDepth;
|
||||
unsigned LoopsReserved;
|
||||
int PredStack[R500_PVS_MAX_LOOP_DEPTH];
|
||||
int PredicateReg;
|
||||
unsigned InCFBreak;
|
||||
};
|
||||
|
||||
static void build_pred_src(
|
||||
struct rc_src_register * src,
|
||||
struct vert_fc_state * fc_state)
|
||||
{
|
||||
src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
|
||||
src->File = RC_FILE_TEMPORARY;
|
||||
src->Index = fc_state->PredicateReg;
|
||||
}
|
||||
|
||||
static void build_pred_dst(
|
||||
struct rc_dst_register * dst,
|
||||
struct vert_fc_state * fc_state)
|
||||
{
|
||||
dst->WriteMask = RC_MASK_W;
|
||||
dst->File = RC_FILE_TEMPORARY;
|
||||
dst->Index = fc_state->PredicateReg;
|
||||
}
|
||||
|
||||
static void mark_write(void * userdata, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask)
|
||||
{
|
||||
unsigned int * writemasks = userdata;
|
||||
|
||||
if (file != RC_FILE_TEMPORARY)
|
||||
return;
|
||||
|
||||
if (index >= R300_VS_MAX_TEMPS)
|
||||
return;
|
||||
|
||||
writemasks[index] |= mask;
|
||||
}
|
||||
|
||||
static int reserve_predicate_reg(struct vert_fc_state * fc_state)
|
||||
{
|
||||
int i;
|
||||
unsigned int writemasks[RC_REGISTER_MAX_INDEX];
|
||||
struct rc_instruction * inst;
|
||||
memset(writemasks, 0, sizeof(writemasks));
|
||||
for(inst = fc_state->C->Program.Instructions.Next;
|
||||
inst != &fc_state->C->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
rc_for_all_writes_mask(inst, mark_write, writemasks);
|
||||
}
|
||||
|
||||
for(i = 0; i < fc_state->C->max_temp_regs; i++) {
|
||||
/* Most of the control flow instructions only write the
|
||||
* W component of the Predicate Register, but
|
||||
* the docs say that ME_PRED_SET_CLR and
|
||||
* ME_PRED_SET_RESTORE write all components of the
|
||||
* register, so we must reserve a register that has
|
||||
* all its components free. */
|
||||
if (!writemasks[i]) {
|
||||
fc_state->PredicateReg = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == fc_state->C->max_temp_regs) {
|
||||
rc_error(fc_state->C, "No free temporary to use for"
|
||||
" predicate stack counter.\n");
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void lower_bgnloop(
|
||||
struct rc_instruction * inst,
|
||||
struct vert_fc_state * fc_state)
|
||||
{
|
||||
struct rc_instruction * new_inst =
|
||||
rc_insert_new_instruction(fc_state->C, inst->Prev);
|
||||
|
||||
if ((!fc_state->C->is_r500
|
||||
&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
|
||||
|| fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
|
||||
rc_error(fc_state->C, "Loops are nested too deep.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
|
||||
if (fc_state->PredicateReg == -1) {
|
||||
if (reserve_predicate_reg(fc_state) == -1) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize the predicate bit to true. */
|
||||
new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
|
||||
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
|
||||
new_inst->U.I.SrcReg[0].Index = 0;
|
||||
new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
|
||||
new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
||||
} else {
|
||||
fc_state->PredStack[fc_state->LoopDepth] =
|
||||
fc_state->PredicateReg;
|
||||
/* Copy the the current predicate value to this loop's
|
||||
* predicate register */
|
||||
|
||||
/* Use the old predicate value for src0 */
|
||||
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
|
||||
|
||||
/* Reserve this loop's predicate register */
|
||||
if (reserve_predicate_reg(fc_state) == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Copy the old predicate value to the new register */
|
||||
new_inst->U.I.Opcode = RC_OPCODE_ADD;
|
||||
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
|
||||
new_inst->U.I.SrcReg[1].Index = 0;
|
||||
new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
|
||||
new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void lower_brk(
|
||||
struct rc_instruction * inst,
|
||||
struct vert_fc_state * fc_state)
|
||||
{
|
||||
if (fc_state->LoopDepth == 1) {
|
||||
inst->U.I.Opcode = RC_OPCODE_RCP;
|
||||
inst->U.I.DstReg.Pred = RC_PRED_INV;
|
||||
inst->U.I.SrcReg[0].Index = 0;
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_NONE;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
||||
} else {
|
||||
inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
|
||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||
}
|
||||
|
||||
build_pred_dst(&inst->U.I.DstReg, fc_state);
|
||||
}
|
||||
|
||||
static void lower_endloop(
|
||||
struct rc_instruction * inst,
|
||||
struct vert_fc_state * fc_state)
|
||||
{
|
||||
struct rc_instruction * new_inst =
|
||||
rc_insert_new_instruction(fc_state->C, inst);
|
||||
|
||||
new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
|
||||
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
|
||||
/* Restore the previous predicate register. */
|
||||
fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
|
||||
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
|
||||
}
|
||||
|
||||
static void lower_if(
|
||||
struct rc_instruction * inst,
|
||||
struct vert_fc_state * fc_state)
|
||||
{
|
||||
/* Reserve a temporary to use as our predicate stack counter, if we
|
||||
* don't already have one. */
|
||||
if (fc_state->PredicateReg == -1) {
|
||||
/* If we are inside a loop, the Predicate Register should
|
||||
* have already been defined. */
|
||||
assert(fc_state->LoopDepth == 0);
|
||||
|
||||
if (reserve_predicate_reg(fc_state) == -1) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
|
||||
fc_state->InCFBreak = 1;
|
||||
}
|
||||
if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
|
||||
|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
|
||||
if (fc_state->InCFBreak) {
|
||||
inst->U.I.Opcode = RC_ME_PRED_SEQ;
|
||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||
} else {
|
||||
inst->U.I.Opcode = RC_ME_PRED_SNEQ;
|
||||
}
|
||||
} else {
|
||||
unsigned swz;
|
||||
inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
|
||||
memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
|
||||
sizeof(inst->U.I.SrcReg[1]));
|
||||
swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
|
||||
/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
|
||||
* w component */
|
||||
inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
|
||||
build_pred_src(&inst->U.I.SrcReg[0], fc_state);
|
||||
}
|
||||
build_pred_dst(&inst->U.I.DstReg, fc_state);
|
||||
}
|
||||
|
||||
void rc_vert_fc(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst;
|
||||
struct vert_fc_state fc_state;
|
||||
|
||||
memset(&fc_state, 0, sizeof(fc_state));
|
||||
fc_state.PredicateReg = -1;
|
||||
fc_state.C = c;
|
||||
|
||||
for(inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
|
||||
switch (inst->U.I.Opcode) {
|
||||
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
lower_bgnloop(inst, &fc_state);
|
||||
fc_state.LoopDepth++;
|
||||
break;
|
||||
|
||||
case RC_OPCODE_BRK:
|
||||
lower_brk(inst, &fc_state);
|
||||
break;
|
||||
|
||||
case RC_OPCODE_ENDLOOP:
|
||||
if (fc_state.BranchDepth != 0
|
||||
|| fc_state.LoopDepth != 1) {
|
||||
lower_endloop(inst, &fc_state);
|
||||
}
|
||||
fc_state.LoopDepth--;
|
||||
/* Skip PRED_RESTORE */
|
||||
inst = inst->Next;
|
||||
break;
|
||||
case RC_OPCODE_IF:
|
||||
lower_if(inst, &fc_state);
|
||||
fc_state.BranchDepth++;
|
||||
break;
|
||||
|
||||
case RC_OPCODE_ELSE:
|
||||
inst->U.I.Opcode = RC_ME_PRED_SET_INV;
|
||||
build_pred_dst(&inst->U.I.DstReg, &fc_state);
|
||||
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
|
||||
break;
|
||||
|
||||
case RC_OPCODE_ENDIF:
|
||||
if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
|
||||
struct rc_instruction * to_delete = inst;
|
||||
inst = inst->Prev;
|
||||
rc_remove_instruction(to_delete);
|
||||
/* XXX: Delete the endif instruction */
|
||||
} else {
|
||||
inst->U.I.Opcode = RC_ME_PRED_SET_POP;
|
||||
build_pred_dst(&inst->U.I.DstReg, &fc_state);
|
||||
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
|
||||
}
|
||||
fc_state.InCFBreak = 0;
|
||||
fc_state.BranchDepth--;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (fc_state.BranchDepth || fc_state.LoopDepth) {
|
||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue