r300/compiler: Refactor nqssadce to use rc_program

Signed-off-by: Nicolai Hähnle <nhaehnle@gmail.com>
This commit is contained in:
Nicolai Hähnle 2009-07-23 22:49:31 +02:00
parent a898e7d66c
commit 92f7a599c7
9 changed files with 114 additions and 116 deletions

View file

@ -35,6 +35,7 @@
#include "../r300_reg.h"
#include "radeon_nqssadce.h"
#include "radeon_compiler.h"
#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
@ -174,18 +175,15 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst,
}
}
struct prog_instruction *inst;
_mesa_insert_instructions(s->Program, s->IP, 1);
inst = s->Program->Instructions + s->IP++;
inst->Opcode = OPCODE_MOV;
inst->DstReg = dst;
inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
inst->SrcReg[0] = src;
inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
inst->I.Opcode = OPCODE_MOV;
inst->I.DstReg = dst;
inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
inst->I.SrcReg[0] = src;
inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
/* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
dst.WriteMask &= ~inst->DstReg.WriteMask;
dst.WriteMask &= ~inst->I.DstReg.WriteMask;
}
}

View file

@ -273,30 +273,30 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
fflush(stdout);
}
rc_mesa_to_rc_program(&c->Base, c->program);
if (c->is_r500) {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r500FPIsNativeSwizzle,
.BuildSwizzle = &r500FPBuildSwizzle
};
radeonNqssaDce(c->program, &nqssadce, 0);
radeonNqssaDce(&c->Base, &nqssadce, 0);
} else {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r300FPIsNativeSwizzle,
.BuildSwizzle = &r300FPBuildSwizzle
};
radeonNqssaDce(c->program, &nqssadce, 0);
radeonNqssaDce(&c->Base, &nqssadce, 0);
}
if (c->Base.Debug) {
_mesa_printf("Compiler: after NqSSA-DCE:\n");
_mesa_print_program(c->program);
rc_print_program(&c->Base.Program);
fflush(stdout);
}
rc_mesa_to_rc_program(&c->Base, c->program);
if (c->is_r500) {
r500BuildFragmentProgramHwCode(c);
} else {

View file

@ -310,35 +310,35 @@ static void ei_pow(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
{
int i;
int cur_reg;
GLuint OutputsWritten, InputsRead;
OutputsWritten = glvp->OutputsWritten;
InputsRead = glvp->InputsRead;
OutputsWritten = c->Base.Program.OutputsWritten;
InputsRead = c->Base.Program.InputsRead;
cur_reg = -1;
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
if (InputsRead & (1 << i))
vp->inputs[i] = ++cur_reg;
c->code->inputs[i] = ++cur_reg;
else
vp->inputs[i] = -1;
c->code->inputs[i] = -1;
}
cur_reg = 0;
for (i = 0; i < VERT_RESULT_MAX; i++)
vp->outputs[i] = -1;
c->code->outputs[i] = -1;
assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
}
if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
}
/* If we're writing back facing colors we need to send
@ -348,39 +348,39 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog
* get written into appropriate output vectors.
*/
if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
vp->outputs[VERT_RESULT_COL0] = cur_reg++;
c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
} else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
vp->outputs[VERT_RESULT_COL1] = cur_reg++;
c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
} else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
} else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
} else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
cur_reg++;
}
for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
if (OutputsWritten & (1 << i)) {
vp->outputs[i] = cur_reg++;
c->code->outputs[i] = cur_reg++;
}
}
if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
}
}
@ -391,7 +391,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
t_inputs_outputs(compiler->code, compiler->program);
t_inputs_outputs(compiler);
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
struct prog_instruction *vpi = &rci->I;
@ -756,7 +756,7 @@ static void nqssadceInit(struct nqssadce_state* s)
}
s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
if (s->Compiler->Program.OutputsWritten & (1 << VERT_RESULT_PSIZ))
s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
}
@ -812,15 +812,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
fflush(stdout);
}
rc_mesa_to_rc_program(&compiler->Base, compiler->program);
{
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadceInit,
.IsNativeSwizzle = &swizzleIsNative,
.BuildSwizzle = NULL
};
radeonNqssaDce(compiler->program, &nqssadce, compiler);
rc_mesa_to_rc_program(&compiler->Base, compiler->program);
radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
/* We need this step for reusing temporary registers */
allocate_temporary_registers(compiler);
@ -834,6 +834,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
translate_vertex_program(compiler);
compiler->code->InputsRead = compiler->program->InputsRead;
compiler->code->OutputsWritten = compiler->program->OutputsWritten;
compiler->code->InputsRead = compiler->Base.Program.InputsRead;
compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
}

View file

@ -245,7 +245,6 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
*/
void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
{
struct prog_instruction *inst;
GLuint negatebase[2] = { 0, 0 };
int i;
@ -256,20 +255,16 @@ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst,
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
}
_mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
inst = s->Program->Instructions + s->IP;
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
inst->Opcode = OPCODE_MOV;
inst->DstReg = dst;
inst->DstReg.WriteMask = negatebase[i];
inst->SrcReg[0] = src;
inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
inst++;
s->IP++;
struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
inst->I.Opcode = OPCODE_MOV;
inst->I.DstReg = dst;
inst->I.DstReg.WriteMask = negatebase[i];
inst->I.SrcReg[0] = src;
inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
}
}

View file

@ -44,6 +44,7 @@ struct rc_program {
struct rc_instruction Instructions;
GLbitfield InputsRead;
GLbitfield OutputsWritten;
GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
};

View file

@ -36,6 +36,8 @@
#include "radeon_nqssadce.h"
#include "radeon_compiler.h"
/**
* Return the @ref register_state for the given register (or 0 for untracked
@ -76,9 +78,10 @@ struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register s
}
static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
struct prog_instruction *inst, GLint src, GLuint sourced)
static void track_used_srcreg(struct nqssadce_state* s,
GLint src, GLuint sourced)
{
struct prog_instruction * inst = &s->IP->I;
int i;
GLuint deswz_source = 0;
@ -95,12 +98,11 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
struct prog_dst_register dstreg = inst->DstReg;
dstreg.File = PROGRAM_TEMPORARY;
dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
dstreg.Index = rc_find_free_temporary(s->Compiler);
dstreg.WriteMask = sourced;
s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
inst = s->Program->Instructions + s->IP;
inst->SrcReg[src].File = PROGRAM_TEMPORARY;
inst->SrcReg[src].Index = dstreg.Index;
inst->SrcReg[src].Swizzle = 0;
@ -126,30 +128,27 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
if (regstate)
regstate->Sourced |= deswz_source & 0xf;
}
return inst;
}
static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex)
{
int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode);
int i;
for(i = 0; i < nsrc; ++i)
if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
inst->SrcReg[i].Index = newindex;
if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex)
inst->I.SrcReg[i].Index = newindex;
}
static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
{
GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
int ip;
for(ip = 0; ip < s->IP; ++ip) {
struct prog_instruction* inst = s->Program->Instructions + ip;
if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
inst->DstReg.Index = newindex;
GLuint newindex = rc_find_free_temporary(s->Compiler);
struct rc_instruction * inst;
for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) {
if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex)
inst->I.DstReg.Index = newindex;
unalias_srcregs(inst, oldindex, newindex);
}
unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
unalias_srcregs(s->IP, oldindex, newindex);
}
@ -158,7 +157,8 @@ static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
*/
static void process_instruction(struct nqssadce_state* s)
{
struct prog_instruction *inst = s->Program->Instructions + s->IP;
struct prog_instruction *inst = &s->IP->I;
GLuint WriteMask;
if (inst->Opcode == OPCODE_END)
return;
@ -166,7 +166,7 @@ static void process_instruction(struct nqssadce_state* s)
if (inst->Opcode != OPCODE_KIL) {
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
fprintf(stderr, "r300 driver: NqssaDce: bad destination register (%i[%i])\n",
rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n",
inst->DstReg.File, inst->DstReg.Index);
return;
}
@ -175,7 +175,9 @@ static void process_instruction(struct nqssadce_state* s)
regstate->Sourced &= ~inst->DstReg.WriteMask;
if (inst->DstReg.WriteMask == 0) {
_mesa_delete_instructions(s->Program, s->IP, 1);
struct rc_instruction * inst_remove = s->IP;
s->IP = s->IP->Prev;
rc_remove_instruction(inst_remove);
return;
}
@ -183,16 +185,15 @@ static void process_instruction(struct nqssadce_state* s)
unalias_temporary(s, inst->DstReg.Index);
}
/* Attention: Due to swizzle emulation code, the following
* might change the instruction stream under us, so we have
* to be careful with the inst pointer. */
WriteMask = inst->DstReg.WriteMask;
switch (inst->Opcode) {
case OPCODE_ARL:
case OPCODE_DDX:
case OPCODE_DDY:
case OPCODE_FRC:
case OPCODE_MOV:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
track_used_srcreg(s, 0, WriteMask);
break;
case OPCODE_ADD:
case OPCODE_MAX:
@ -200,14 +201,14 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_MUL:
case OPCODE_SGE:
case OPCODE_SLT:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
track_used_srcreg(s, 0, WriteMask);
track_used_srcreg(s, 1, WriteMask);
break;
case OPCODE_CMP:
case OPCODE_MAD:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
track_used_srcreg(s, 0, WriteMask);
track_used_srcreg(s, 1, WriteMask);
track_used_srcreg(s, 2, WriteMask);
break;
case OPCODE_COS:
case OPCODE_EX2:
@ -215,83 +216,79 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
inst = track_used_srcreg(s, inst, 0, 0x1);
track_used_srcreg(s, 0, 0x1);
break;
case OPCODE_DP3:
inst = track_used_srcreg(s, inst, 0, 0x7);
inst = track_used_srcreg(s, inst, 1, 0x7);
track_used_srcreg(s, 0, 0x7);
track_used_srcreg(s, 1, 0x7);
break;
case OPCODE_DP4:
inst = track_used_srcreg(s, inst, 0, 0xf);
inst = track_used_srcreg(s, inst, 1, 0xf);
track_used_srcreg(s, 0, 0xf);
track_used_srcreg(s, 1, 0xf);
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
inst = track_used_srcreg(s, inst, 0, 0xf);
track_used_srcreg(s, 0, 0xf);
break;
case OPCODE_DST:
inst = track_used_srcreg(s, inst, 0, 0x6);
inst = track_used_srcreg(s, inst, 1, 0xa);
track_used_srcreg(s, 0, 0x6);
track_used_srcreg(s, 1, 0xa);
break;
case OPCODE_EXP:
case OPCODE_LOG:
case OPCODE_POW:
inst = track_used_srcreg(s, inst, 0, 0x3);
track_used_srcreg(s, 0, 0x3);
break;
case OPCODE_LIT:
inst = track_used_srcreg(s, inst, 0, 0xb);
track_used_srcreg(s, 0, 0xb);
break;
default:
fprintf(stderr, "r300 driver: NqssaDce: Unknown opcode %d\n", inst->Opcode);
rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
s->IP = s->IP->Prev;
}
static void calculateInputsOutputs(struct gl_program *p)
static void calculateInputs(struct radeon_compiler * c)
{
struct prog_instruction *inst;
GLuint InputsRead, OutputsWritten;
struct rc_instruction *inst;
inst = p->Instructions;
InputsRead = 0;
OutputsWritten = 0;
while (inst->Opcode != OPCODE_END)
c->Program.InputsRead = 0;
c->Program.OutputsWritten = 0;
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
{
int i, num_src_regs;
int i;
int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode);
num_src_regs = _mesa_num_inst_src_regs(inst->Opcode);
for (i = 0; i < num_src_regs; ++i) {
if (inst->SrcReg[i].File == PROGRAM_INPUT)
InputsRead |= 1 << inst->SrcReg[i].Index;
if (inst->I.SrcReg[i].File == PROGRAM_INPUT)
c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
}
if (inst->DstReg.File == PROGRAM_OUTPUT)
OutputsWritten |= 1 << inst->DstReg.Index;
++inst;
if (_mesa_num_inst_dst_regs(inst->I.Opcode)) {
if (inst->I.DstReg.File == PROGRAM_OUTPUT)
c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
}
}
p->InputsRead = InputsRead;
p->OutputsWritten = OutputsWritten;
}
void radeonNqssaDce(struct gl_program *p, struct radeon_nqssadce_descr* descr, void * data)
void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data)
{
struct nqssadce_state s;
_mesa_bzero(&s, sizeof(s));
s.Program = p;
s.Compiler = c;
s.Descr = descr;
s.UserData = data;
s.Descr->Init(&s);
s.IP = p->NumInstructions;
s.IP = c->Program.Instructions.Prev;
while(s.IP > 0) {
s.IP--;
while(s.IP != &c->Program.Instructions && !c->Error)
process_instruction(&s);
}
calculateInputsOutputs(p);
calculateInputs(c);
}

View file

@ -30,7 +30,6 @@
#include "radeon_program.h"
struct register_state {
/**
* Bitmask indicating which components of the register are sourced
@ -44,13 +43,13 @@ struct register_state {
* read from, etc.
*/
struct nqssadce_state {
struct gl_program *Program;
struct radeon_compiler *Compiler;
struct radeon_nqssadce_descr *Descr;
/**
* All instructions after this instruction pointer have been dealt with.
*/
int IP;
struct rc_instruction * IP;
/**
* Which registers are read by subsequent instructions?
@ -86,7 +85,7 @@ struct radeon_nqssadce_descr {
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
};
void radeonNqssaDce(struct gl_program *p, struct radeon_nqssadce_descr* descr, void * data);
void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data);
struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */

View file

@ -180,6 +180,12 @@ struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, str
return inst;
}
void rc_remove_instruction(struct rc_instruction * inst)
{
inst->Prev->Next = inst->Next;
inst->Next->Prev = inst->Prev;
}
void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
{
@ -192,6 +198,7 @@ void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * progr
c->Program.ShadowSamplers = program->ShadowSamplers;
c->Program.InputsRead = program->InputsRead;
c->Program.OutputsWritten = program->OutputsWritten;
}

View file

@ -133,6 +133,7 @@ GLint rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
void rc_remove_instruction(struct rc_instruction * inst);
void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
void rc_print_program(const struct rc_program *prog);