r300/compiler: Refactor local transforms to use rc_program

Signed-off-by: Nicolai Hähnle <nhaehnle@gmail.com>
This commit is contained in:
Nicolai Hähnle 2009-07-24 22:34:44 +02:00
parent 800f482586
commit 6f4608f53c
14 changed files with 660 additions and 642 deletions

View file

@ -31,16 +31,12 @@
#include "../r300_reg.h"
static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
gl_state_index fail_value_tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
};
struct prog_src_register reg = { 0, };
fail_value_tokens[2] = tmu;
reg.File = PROGRAM_STATE_VAR;
reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
reg.Swizzle = SWIZZLE_WWWW;
return reg;
}
@ -50,173 +46,146 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t
* - premultiply texture coordinates for RECT
* - extract operand swizzles
* - introduce a temporary register when write masks are needed
*
* \todo If/when r5xx uses the radeon_program architecture, this can probably
* be reused.
*/
GLboolean r300_transform_TEX(
struct radeon_transform_context *t,
struct prog_instruction* orig_inst, void* data)
struct radeon_compiler * c,
struct rc_instruction* inst,
void* data)
{
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
struct prog_instruction inst = *orig_inst;
struct prog_instruction* tgt;
GLboolean destredirect = GL_FALSE;
if (inst.Opcode != OPCODE_TEX &&
inst.Opcode != OPCODE_TXB &&
inst.Opcode != OPCODE_TXP &&
inst.Opcode != OPCODE_KIL)
if (inst->I.Opcode != OPCODE_TEX &&
inst->I.Opcode != OPCODE_TXB &&
inst->I.Opcode != OPCODE_TXP &&
inst->I.Opcode != OPCODE_KIL)
return GL_FALSE;
if (inst.Opcode != OPCODE_KIL &&
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
/* ARB_shadow & EXT_shadow_funcs */
if (inst->I.Opcode != OPCODE_KIL &&
c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
tgt = radeonAppendInstructions(t->Program, 1);
inst->I.Opcode = OPCODE_MOV;
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = inst.DstReg;
if (comparefunc == GL_ALWAYS) {
tgt->SrcReg[0].File = PROGRAM_BUILTIN;
tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
} else {
tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
}
return GL_TRUE;
} else {
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
int pass, fail;
inst_rcp->I.Opcode = OPCODE_RCP;
inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
inst_cmp->I.DstReg = inst->I.DstReg;
inst->I.DstReg.File = PROGRAM_TEMPORARY;
inst->I.DstReg.Index = rc_find_free_temporary(c);
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
inst_mad->I.Opcode = OPCODE_MAD;
inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
else
inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
inst_cmp->I.Opcode = OPCODE_CMP;
/* DstReg has been filled out above */
inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
pass = 1;
fail = 2;
} else {
pass = 2;
fail = 1;
}
inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
}
inst.DstReg.File = PROGRAM_TEMPORARY;
inst.DstReg.Index = radeonFindFreeTemporary(t);
inst.DstReg.WriteMask = WRITEMASK_XYZW;
}
/* Hardware uses [0..1]x[0..1] range for rectangle textures
* instead of [0..Width]x[0..Height].
* Add a scaling instruction.
*/
if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
gl_state_index tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
0
};
if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) {
struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev);
int tempreg = radeonFindFreeTemporary(t);
int factor_index;
inst_mul->I.Opcode = OPCODE_MUL;
inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
inst_mul->I.DstReg.Index = rc_find_free_temporary(c);
inst_mul->I.SrcReg[0] = inst->I.SrcReg[0];
inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR;
inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit);
tokens[2] = inst.TexSrcUnit;
factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MUL;
tgt->DstReg.File = PROGRAM_TEMPORARY;
tgt->DstReg.Index = tempreg;
tgt->SrcReg[0] = inst.SrcReg[0];
tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
tgt->SrcReg[1].Index = factor_index;
reset_srcreg(&inst.SrcReg[0]);
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
inst.SrcReg[0].Index = tempreg;
reset_srcreg(&inst->I.SrcReg[0]);
inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index;
}
if (inst.Opcode != OPCODE_KIL) {
if (inst.DstReg.File != PROGRAM_TEMPORARY ||
inst.DstReg.WriteMask != WRITEMASK_XYZW) {
int tempreg = radeonFindFreeTemporary(t);
/* Cannot write texture to output registers or with masks */
if (inst->I.Opcode != OPCODE_KIL &&
(inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
inst.DstReg.File = PROGRAM_TEMPORARY;
inst.DstReg.Index = tempreg;
inst.DstReg.WriteMask = WRITEMASK_XYZW;
destredirect = GL_TRUE;
} else if (inst.SaturateMode) {
destredirect = GL_TRUE;
}
inst_mov->I.Opcode = OPCODE_MOV;
inst_mov->I.DstReg = inst->I.DstReg;
inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
inst->I.DstReg.File = PROGRAM_TEMPORARY;
inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
}
if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
int tmpreg = radeonFindFreeTemporary(t);
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg.File = PROGRAM_TEMPORARY;
tgt->DstReg.Index = tmpreg;
tgt->SrcReg[0] = inst.SrcReg[0];
reset_srcreg(&inst.SrcReg[0]);
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
inst.SrcReg[0].Index = tmpreg;
}
/* Cannot read texture coordinate from constants file */
if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
tgt = radeonAppendInstructions(t->Program, 1);
_mesa_copy_instructions(tgt, &inst, 1);
inst_mov->I.Opcode = OPCODE_MOV;
inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
if (inst.Opcode != OPCODE_KIL &&
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode;
int rcptemp = radeonFindFreeTemporary(t);
int pass, fail;
tgt = radeonAppendInstructions(t->Program, 3);
tgt[0].Opcode = OPCODE_RCP;
tgt[0].DstReg.File = PROGRAM_TEMPORARY;
tgt[0].DstReg.Index = rcptemp;
tgt[0].DstReg.WriteMask = WRITEMASK_W;
tgt[0].SrcReg[0] = inst.SrcReg[0];
tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
tgt[1].Opcode = OPCODE_MAD;
tgt[1].DstReg = inst.DstReg;
tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
tgt[1].SrcReg[0] = inst.SrcReg[0];
tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
tgt[1].SrcReg[1].Index = rcptemp;
tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
tgt[1].SrcReg[2].Index = inst.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
else
tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
tgt[2].Opcode = OPCODE_CMP;
tgt[2].DstReg = orig_inst->DstReg;
tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
pass = 1;
fail = 2;
} else {
pass = 2;
fail = 1;
}
tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
} else if (destredirect) {
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = orig_inst->DstReg;
tgt->SaturateMode = inst.SaturateMode;
tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
tgt->SrcReg[0].Index = inst.DstReg.Index;
reset_srcreg(&inst->I.SrcReg[0]);
inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
}
return GL_TRUE;

View file

@ -44,6 +44,6 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler
extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
#endif

View file

@ -250,6 +250,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
rewrite_depth_out(c->program);
rc_mesa_to_rc_program(&c->Base, c->program);
if (c->is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r500_transform_TEX, c },
@ -257,24 +259,22 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 }
};
radeonLocalTransform(c->program, 4, transformations);
radeonLocalTransform(&c->Base, 4, transformations);
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_TEX, c },
{ &radeonTransformALU, 0 },
{ &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(c->program, 3, transformations);
radeonLocalTransform(&c->Base, 3, transformations);
}
if (c->Base.Debug) {
_mesa_printf("Fragment Program: After native rewrite:\n");
_mesa_print_program(c->program);
rc_print_program(&c->Base.Program);
fflush(stdout);
}
rc_mesa_to_rc_program(&c->Base, c->program);
if (c->is_r500) {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,

View file

@ -538,47 +538,43 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
* Introduce intermediate MOVs to temporary registers to account for this.
*/
static GLboolean transform_source_conflicts(
struct radeon_transform_context *t,
struct prog_instruction* orig_inst,
struct radeon_compiler *c,
struct rc_instruction* inst,
void* unused)
{
struct prog_instruction inst = *orig_inst;
struct prog_instruction * dst;
GLuint num_operands = _mesa_num_inst_src_regs(inst.Opcode);
GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
if (num_operands == 3) {
if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[2])
|| t_src_conflict(inst.SrcReg[0], inst.SrcReg[2])) {
int tmpreg = radeonFindFreeTemporary(t);
struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
inst_mov->Opcode = OPCODE_MOV;
inst_mov->DstReg.File = PROGRAM_TEMPORARY;
inst_mov->DstReg.Index = tmpreg;
inst_mov->SrcReg[0] = inst.SrcReg[2];
if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
|| t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->I.Opcode = OPCODE_MOV;
inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
inst_mov->I.DstReg.Index = tmpreg;
inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
reset_srcreg(&inst.SrcReg[2]);
inst.SrcReg[2].File = PROGRAM_TEMPORARY;
inst.SrcReg[2].Index = tmpreg;
reset_srcreg(&inst->I.SrcReg[2]);
inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
inst->I.SrcReg[2].Index = tmpreg;
}
}
if (num_operands >= 2) {
if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[0])) {
int tmpreg = radeonFindFreeTemporary(t);
struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
inst_mov->Opcode = OPCODE_MOV;
inst_mov->DstReg.File = PROGRAM_TEMPORARY;
inst_mov->DstReg.Index = tmpreg;
inst_mov->SrcReg[0] = inst.SrcReg[1];
if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->I.Opcode = OPCODE_MOV;
inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
inst_mov->I.DstReg.Index = tmpreg;
inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
reset_srcreg(&inst.SrcReg[1]);
inst.SrcReg[1].File = PROGRAM_TEMPORARY;
inst.SrcReg[1].Index = tmpreg;
reset_srcreg(&inst->I.SrcReg[1]);
inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
inst->I.SrcReg[1].Index = tmpreg;
}
}
dst = radeonAppendInstructions(t->Program, 1);
*dst = inst;
return GL_TRUE;
}
@ -782,16 +778,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
addArtificialOutputs(compiler);
rc_mesa_to_rc_program(&compiler->Base, compiler->program);
{
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
};
radeonLocalTransform(compiler->program, 1, transformations);
radeonLocalTransform(&compiler->Base, 1, transformations);
}
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after native rewrite:\n");
_mesa_print_program(compiler->program);
rc_print_program(&compiler->Base.Program);
fflush(stdout);
}
@ -803,17 +801,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
struct radeon_program_transformation transformations[] = {
{ &transform_source_conflicts, 0 },
};
radeonLocalTransform(compiler->program, 1, transformations);
radeonLocalTransform(&compiler->Base, 1, transformations);
}
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after source conflict resolve:\n");
_mesa_print_program(compiler->program);
rc_print_program(&compiler->Base.Program);
fflush(stdout);
}
rc_mesa_to_rc_program(&compiler->Base, compiler->program);
{
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadceInit,

View file

@ -29,152 +29,139 @@
#include "../r300_reg.h"
static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
gl_state_index fail_value_tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
};
struct prog_src_register reg = { 0, };
fail_value_tokens[2] = tmu;
reg.File = PROGRAM_STATE_VAR;
reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
reg.Swizzle = SWIZZLE_WWWW;
return reg;
}
/**
* Transform TEX, TXP, TXB, and KIL instructions in the following way:
* - premultiply texture coordinates for RECT
* - extract operand swizzles
* - introduce a temporary register when write masks are needed
*
* - implement texture compare (shadow extensions)
* - extract non-native source / destination operands
*/
GLboolean r500_transform_TEX(
struct radeon_transform_context *t,
struct prog_instruction* orig_inst, void* data)
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data)
{
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
struct prog_instruction inst = *orig_inst;
struct prog_instruction* tgt;
GLboolean destredirect = GL_FALSE;
if (inst.Opcode != OPCODE_TEX &&
inst.Opcode != OPCODE_TXB &&
inst.Opcode != OPCODE_TXP &&
inst.Opcode != OPCODE_KIL)
if (inst->I.Opcode != OPCODE_TEX &&
inst->I.Opcode != OPCODE_TXB &&
inst->I.Opcode != OPCODE_TXP &&
inst->I.Opcode != OPCODE_KIL)
return GL_FALSE;
/* ARB_shadow & EXT_shadow_funcs */
if (inst.Opcode != OPCODE_KIL &&
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
if (inst->I.Opcode != OPCODE_KIL &&
c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
tgt = radeonAppendInstructions(t->Program, 1);
inst->I.Opcode = OPCODE_MOV;
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = inst.DstReg;
if (comparefunc == GL_ALWAYS) {
tgt->SrcReg[0].File = PROGRAM_BUILTIN;
tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
} else {
tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
}
return GL_TRUE;
}
inst.DstReg.File = PROGRAM_TEMPORARY;
inst.DstReg.Index = radeonFindFreeTemporary(t);
inst.DstReg.WriteMask = WRITEMASK_XYZW;
} else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
int tempreg = radeonFindFreeTemporary(t);
inst.DstReg.File = PROGRAM_TEMPORARY;
inst.DstReg.Index = tempreg;
inst.DstReg.WriteMask = WRITEMASK_XYZW;
destredirect = GL_TRUE;
}
if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
int tmpreg = radeonFindFreeTemporary(t);
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg.File = PROGRAM_TEMPORARY;
tgt->DstReg.Index = tmpreg;
tgt->SrcReg[0] = inst.SrcReg[0];
reset_srcreg(&inst.SrcReg[0]);
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
inst.SrcReg[0].Index = tmpreg;
}
tgt = radeonAppendInstructions(t->Program, 1);
_mesa_copy_instructions(tgt, &inst, 1);
if (inst.Opcode != OPCODE_KIL &&
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode;
int rcptemp = radeonFindFreeTemporary(t);
int pass, fail;
tgt = radeonAppendInstructions(t->Program, 3);
tgt[0].Opcode = OPCODE_RCP;
tgt[0].DstReg.File = PROGRAM_TEMPORARY;
tgt[0].DstReg.Index = rcptemp;
tgt[0].DstReg.WriteMask = WRITEMASK_W;
tgt[0].SrcReg[0] = inst.SrcReg[0];
tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
tgt[1].Opcode = OPCODE_MAD;
tgt[1].DstReg = inst.DstReg;
tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
tgt[1].SrcReg[0] = inst.SrcReg[0];
tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
tgt[1].SrcReg[1].Index = rcptemp;
tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
tgt[1].SrcReg[2].Index = inst.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
else
tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
tgt[2].Opcode = OPCODE_CMP;
tgt[2].DstReg = orig_inst->DstReg;
tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
pass = 1;
fail = 2;
} else {
pass = 2;
fail = 1;
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
int pass, fail;
inst_rcp->I.Opcode = OPCODE_RCP;
inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
inst_cmp->I.DstReg = inst->I.DstReg;
inst->I.DstReg.File = PROGRAM_TEMPORARY;
inst->I.DstReg.Index = rc_find_free_temporary(c);
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
inst_mad->I.Opcode = OPCODE_MAD;
inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
else
inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
inst_cmp->I.Opcode = OPCODE_CMP;
/* DstReg has been filled out above */
inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
pass = 1;
fail = 2;
} else {
pass = 2;
fail = 1;
}
inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
}
}
tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
} else if (destredirect) {
tgt = radeonAppendInstructions(t->Program, 1);
/* Cannot write texture to output registers */
if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = orig_inst->DstReg;
tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
tgt->SrcReg[0].Index = inst.DstReg.Index;
inst_mov->I.Opcode = OPCODE_MOV;
inst_mov->I.DstReg = inst->I.DstReg;
inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
inst->I.DstReg.File = PROGRAM_TEMPORARY;
inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
}
/* Cannot read texture coordinate from constants file */
if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->I.Opcode = OPCODE_MOV;
inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
reset_srcreg(&inst->I.SrcReg[0]);
inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
}
return GL_TRUE;

View file

@ -47,6 +47,9 @@ extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register r
extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
extern GLboolean r500_transform_TEX(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data);
#endif

View file

@ -76,3 +76,95 @@ unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * cons
return index;
}
/**
* Add a state vector to the constant list, while trying to avoid duplicates.
*/
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
{
unsigned index;
struct rc_constant constant;
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_STATE) {
if (c->Constants[index].u.State[0] == state0 &&
c->Constants[index].u.State[1] == state1)
return index;
}
}
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_STATE;
constant.Size = 4;
constant.u.State[0] = state0;
constant.u.State[1] = state1;
return rc_constants_add(c, &constant);
}
/**
* Add an immediate vector to the constant list, while trying to avoid
* duplicates.
*/
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
{
unsigned index;
struct rc_constant constant;
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
return index;
}
}
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 4;
memcpy(constant.u.Immediate, data, sizeof(float) * 4);
return rc_constants_add(c, &constant);
}
/**
* Add an immediate scalar to the constant list, while trying to avoid
* duplicates.
*/
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
{
unsigned index;
int free_index = -1;
struct rc_constant constant;
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) {
if (c->Constants[index].u.Immediate[comp] == data) {
*swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
return index;
}
}
if (c->Constants[index].Size < 4)
free_index = index;
}
}
if (free_index >= 0) {
unsigned comp = c->Constants[free_index].Size++;
c->Constants[free_index].u.Immediate[comp] = data;
*swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
return free_index;
}
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 1;
constant.u.Immediate[0] = data;
*swizzle = SWIZZLE_XXXX;
return rc_constants_add(c, &constant);
}

View file

@ -35,8 +35,6 @@
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
enum {
/**
@ -50,17 +48,26 @@ enum {
/**
* Constant referring to state that is known by this compiler,
* i.e. *not* arbitrary Mesa (or other) state.
* see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
*/
RC_CONSTANT_STATE
};
enum {
RC_STATE_SHADOW_AMBIENT = 0,
RC_STATE_R300_WINDOW_DIMENSION,
RC_STATE_R300_TEXRECT_FACTOR
};
struct rc_constant {
unsigned Type:2; /**< RC_CONSTANT_xxx */
unsigned Size:3;
union {
unsigned External;
float Immediate[4];
unsigned State[4];
unsigned State[2];
} u;
};
@ -75,6 +82,9 @@ void rc_constants_init(struct rc_constant_list * c);
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
void rc_constants_destroy(struct rc_constant_list * c);
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
/**
* Stores state that influences the compilation of a fragment program.

View file

@ -67,7 +67,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...);
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
struct gl_program *program;
struct gl_program * program;
struct r300_fragment_program_external_state state;
GLboolean is_r500;
};

View file

@ -48,83 +48,25 @@
* one instruction at a time.
*/
void radeonLocalTransform(
struct gl_program *program,
struct radeon_compiler * c,
int num_transformations,
struct radeon_program_transformation* transformations)
{
struct radeon_transform_context ctx;
int ip;
struct rc_instruction * inst = c->Program.Instructions.Next;
ctx.Program = program;
ctx.OldInstructions = program->Instructions;
ctx.OldNumInstructions = program->NumInstructions;
program->Instructions = 0;
program->NumInstructions = 0;
for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
struct prog_instruction *instr = ctx.OldInstructions + ip;
while(inst != &c->Program.Instructions) {
struct rc_instruction * current = inst;
int i;
inst = inst->Next;
for(i = 0; i < num_transformations; ++i) {
struct radeon_program_transformation* t = transformations + i;
if (t->function(&ctx, instr, t->userData))
if (t->function(c, current, t->userData))
break;
}
if (i >= num_transformations) {
struct prog_instruction* dest = radeonAppendInstructions(program, 1);
_mesa_copy_instructions(dest, instr, 1);
}
}
_mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
}
static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
{
GLuint i;
for (i = 0; i < count; i++) {
const struct prog_instruction *inst = insts + i;
const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
GLuint k;
for (k = 0; k < n; k++) {
if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
used[inst->SrcReg[k].Index] = GL_TRUE;
}
}
}
GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
{
GLboolean used[MAX_PROGRAM_TEMPS];
GLuint i;
_mesa_memset(used, 0, sizeof(used));
scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
if (!used[i])
return i;
}
return -1;
}
/**
* Append the given number of instructions to the program and return a
* pointer to the first new instruction.
*/
struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count)
{
int oldnum = program->NumInstructions;
_mesa_insert_instructions(program, oldnum, count);
return program->Instructions + oldnum;
}
@ -206,6 +148,7 @@ void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * progr
struct rc_constant constant;
constant.Type = RC_CONSTANT_EXTERNAL;
constant.Size = 4;
constant.u.External = i;
rc_constants_add(&c->Program.Constants, &constant);

View file

@ -87,18 +87,6 @@ static INLINE void reset_srcreg(struct prog_src_register* reg)
}
/**
* Transformation context that is passed to local transformations.
*
* Care must be taken with some operations during transformation,
* e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
*/
struct radeon_transform_context {
struct gl_program *Program;
struct prog_instruction *OldInstructions;
GLuint OldNumInstructions;
};
/**
* A transformation that can be passed to \ref radeonLocalTransform.
*
@ -111,24 +99,17 @@ struct radeon_transform_context {
*/
struct radeon_program_transformation {
GLboolean (*function)(
struct radeon_transform_context*,
struct prog_instruction*,
struct radeon_compiler*,
struct rc_instruction*,
void*);
void *userData;
};
void radeonLocalTransform(
struct gl_program *program,
struct radeon_compiler *c,
int num_transformations,
struct radeon_program_transformation* transformations);
/**
* Find a usable free temporary register during program transformation
*/
GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
GLint rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);

View file

@ -35,49 +35,52 @@
#include "radeon_program_alu.h"
#include "shader/prog_parameter.h"
#include "radeon_compiler.h"
static struct prog_instruction *emit1(struct gl_program* p,
static struct rc_instruction *emit1(
struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg)
{
struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
fpi->Opcode = Opcode;
fpi->SaturateMode = Saturate;
fpi->DstReg = DstReg;
fpi->SrcReg[0] = SrcReg;
fpi->I.Opcode = Opcode;
fpi->I.SaturateMode = Saturate;
fpi->I.DstReg = DstReg;
fpi->I.SrcReg[0] = SrcReg;
return fpi;
}
static struct prog_instruction *emit2(struct gl_program* p,
static struct rc_instruction *emit2(
struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
{
struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
fpi->Opcode = Opcode;
fpi->SaturateMode = Saturate;
fpi->DstReg = DstReg;
fpi->SrcReg[0] = SrcReg0;
fpi->SrcReg[1] = SrcReg1;
fpi->I.Opcode = Opcode;
fpi->I.SaturateMode = Saturate;
fpi->I.DstReg = DstReg;
fpi->I.SrcReg[0] = SrcReg0;
fpi->I.SrcReg[1] = SrcReg1;
return fpi;
}
static struct prog_instruction *emit3(struct gl_program* p,
static struct rc_instruction *emit3(
struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
struct prog_src_register SrcReg2)
{
struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
fpi->Opcode = Opcode;
fpi->SaturateMode = Saturate;
fpi->DstReg = DstReg;
fpi->SrcReg[0] = SrcReg0;
fpi->SrcReg[1] = SrcReg1;
fpi->SrcReg[2] = SrcReg2;
fpi->I.Opcode = Opcode;
fpi->I.SaturateMode = Saturate;
fpi->I.DstReg = DstReg;
fpi->I.SrcReg[0] = SrcReg0;
fpi->I.SrcReg[1] = SrcReg1;
fpi->I.SrcReg[2] = SrcReg2;
return fpi;
}
@ -171,58 +174,63 @@ static struct prog_src_register scalar(struct prog_src_register reg)
return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
}
static void transform_ABS(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct prog_src_register src = inst->SrcReg[0];
struct prog_src_register src = inst->I.SrcReg[0];
src.Abs = 1;
src.Negate = NEGATE_NONE;
emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src);
rc_remove_instruction(inst);
}
static void transform_DP3(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_DP3(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct prog_src_register src0 = inst->SrcReg[0];
struct prog_src_register src1 = inst->SrcReg[1];
struct prog_src_register src0 = inst->I.SrcReg[0];
struct prog_src_register src1 = inst->I.SrcReg[1];
src0.Negate &= ~NEGATE_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= SWIZZLE_ZERO << (3 * 3);
src1.Negate &= ~NEGATE_W;
src1.Swizzle &= ~(7 << (3 * 3));
src1.Swizzle |= SWIZZLE_ZERO << (3 * 3);
emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, src1);
emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
static void transform_DPH(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_DPH(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct prog_src_register src0 = inst->SrcReg[0];
struct prog_src_register src0 = inst->I.SrcReg[0];
src0.Negate &= ~NEGATE_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]);
rc_remove_instruction(inst);
}
/**
* [1, src0.y*src1.y, src0.z, src1.w]
* So basically MUL with lotsa swizzling.
*/
static void transform_DST(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_DST(struct radeon_compiler* c,
struct rc_instruction* inst)
{
emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg,
swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
rc_remove_instruction(inst);
}
static void transform_FLR(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_FLR(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
int tempreg = rc_find_free_temporary(c);
emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]);
emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg,
inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
rc_remove_instruction(inst);
}
/**
@ -243,152 +251,159 @@ static void transform_FLR(struct radeon_transform_context* t,
* 5 slots, if the subsequent optimization passes are clever enough
* to pair instructions correctly.
*/
static void transform_LIT(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
static const GLfloat LitConst[4] = { -127.999999 };
GLuint constant;
GLuint constant_swizzle;
GLuint temp;
int needTemporary = 0;
struct prog_src_register srctemp;
constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
needTemporary = 1;
} else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
// LIT is typically followed by DP3/DP4, so there's no point
// in creating special code for this case
needTemporary = 1;
if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) {
struct rc_instruction * inst_mov;
inst_mov = emit1(c, inst,
OPCODE_MOV, 0, inst->I.DstReg,
srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c)));
inst->I.DstReg.File = PROGRAM_TEMPORARY;
inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
}
if (needTemporary) {
temp = radeonFindFreeTemporary(t);
} else {
temp = inst->DstReg.Index;
}
temp = inst->I.DstReg.Index;
srctemp = srcreg(PROGRAM_TEMPORARY, temp);
// tmp.x = max(0.0, Src.x);
// tmp.y = max(0.0, Src.y);
// tmp.w = clamp(Src.z, -128+eps, 128-eps);
emit2(t->Program, OPCODE_MAX, 0,
emit2(c, inst->Prev, OPCODE_MAX, 0,
dstregtmpmask(temp, WRITEMASK_XYW),
inst->SrcReg[0],
inst->I.SrcReg[0],
swizzle(srcreg(PROGRAM_CONSTANT, constant),
SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
emit2(t->Program, OPCODE_MIN, 0,
emit2(c, inst->Prev, OPCODE_MIN, 0,
dstregtmpmask(temp, WRITEMASK_Z),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
// tmp.w = Pow(tmp.y, tmp.w)
emit1(t->Program, OPCODE_LG2, 0,
emit1(c, inst->Prev, OPCODE_LG2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
emit2(t->Program, OPCODE_MUL, 0,
emit2(c, inst->Prev, OPCODE_MUL, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
emit1(t->Program, OPCODE_EX2, 0,
emit1(c, inst->Prev, OPCODE_EX2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
// tmp.z = (tmp.x > 0) ? tmp.w : 0.0
emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode,
dstregtmpmask(temp, WRITEMASK_Z),
negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
builtin_zero);
// tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode,
dstregtmpmask(temp, WRITEMASK_XYW),
swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
if (needTemporary)
emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
rc_remove_instruction(inst);
}
static void transform_LRP(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_LRP(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
int tempreg = rc_find_free_temporary(c);
emit2(t->Program, OPCODE_ADD, 0,
emit2(c, inst->Prev, OPCODE_ADD, 0,
dstreg(PROGRAM_TEMPORARY, tempreg),
inst->SrcReg[1], negate(inst->SrcReg[2]));
emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
inst->DstReg,
inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
inst->I.SrcReg[1], negate(inst->I.SrcReg[2]));
emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode,
inst->I.DstReg,
inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]);
rc_remove_instruction(inst);
}
static void transform_POW(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_POW(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
int tempreg = rc_find_free_temporary(c);
struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
tempdst.WriteMask = WRITEMASK_W;
tempsrc.Swizzle = SWIZZLE_WWWW;
emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0]));
emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1]));
emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc);
rc_remove_instruction(inst);
}
static void transform_RSQ(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_RSQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]);
}
static void transform_SGE(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_SGE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
int tempreg = rc_find_free_temporary(c);
emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
static void transform_SLT(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_SLT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
int tempreg = rc_find_free_temporary(c);
emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
static void transform_SUB(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_SUB(struct radeon_compiler* c,
struct rc_instruction* inst)
{
emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
inst->I.Opcode = OPCODE_ADD;
inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]);
}
static void transform_SWZ(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_SWZ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
inst->I.Opcode = OPCODE_MOV;
}
static void transform_XPD(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_XPD(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
int tempreg = rc_find_free_temporary(c);
emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg,
swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
rc_remove_instruction(inst);
}
@ -406,63 +421,64 @@ static void transform_XPD(struct radeon_transform_context* t,
*
* @note should be applicable to R300 and R500 fragment programs.
*/
GLboolean radeonTransformALU(struct radeon_transform_context* t,
struct prog_instruction* inst,
GLboolean radeonTransformALU(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* unused)
{
switch(inst->Opcode) {
case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
switch(inst->I.Opcode) {
case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE;
case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
case OPCODE_DST: transform_DST(c, inst); return GL_TRUE;
case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE;
case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
case OPCODE_POW: transform_POW(c, inst); return GL_TRUE;
case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE;
case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE;
case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE;
case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
default:
return GL_FALSE;
}
}
static void transform_r300_vertex_ABS(struct radeon_transform_context* t,
struct prog_instruction* inst)
static void transform_r300_vertex_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* Note: r500 can take absolute values, but r300 cannot. */
struct prog_src_register src1 = inst->SrcReg[0];
src1.Negate ^= NEGATE_XYZW;
emit2(t->Program, OPCODE_MAX, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], src1);
inst->I.Opcode = OPCODE_MAX;
inst->I.SrcReg[1] = inst->I.SrcReg[0];
inst->I.SrcReg[1].Negate ^= NEGATE_XYZW;
}
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
*/
GLboolean r300_transform_vertex_alu(struct radeon_transform_context* t,
struct prog_instruction* inst,
GLboolean r300_transform_vertex_alu(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* unused)
{
switch(inst->Opcode) {
case OPCODE_ABS: transform_r300_vertex_ABS(t, inst); return GL_TRUE;
case OPCODE_DP3: transform_DP3(t, inst); return GL_TRUE;
case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
switch(inst->I.Opcode) {
case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE;
case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE;
case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
default:
return GL_FALSE;
}
}
static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
static void sincos_constants(struct radeon_compiler* c, GLuint *constants)
{
static const GLfloat SinCosConsts[2][4] = {
{
@ -480,11 +496,8 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
};
int i;
for(i = 0; i < 2; ++i) {
GLuint swz;
constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
ASSERT(swz == SWIZZLE_NOOP);
}
for(i = 0; i < 2; ++i)
constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
}
/**
@ -495,23 +508,24 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
* MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
* MAD dest, tmp.y, weight, tmp.x
*/
static void sin_approx(struct radeon_transform_context* t,
static void sin_approx(
struct radeon_compiler* c, struct rc_instruction * after,
struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
{
GLuint tempreg = radeonFindFreeTemporary(t);
GLuint tempreg = rc_find_free_temporary(c);
emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
emit2(c, after->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcreg(PROGRAM_CONSTANT, constants[0]));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
emit3(t->Program, OPCODE_MAD, 0, dst,
emit3(c, after->Prev, OPCODE_MAD, 0, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
@ -522,78 +536,80 @@ static void sin_approx(struct radeon_transform_context* t,
* using only the basic instructions
* MOV, ADD, MUL, MAD, FRC
*/
GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
struct prog_instruction* inst,
GLboolean radeonTransformTrigSimple(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
if (inst->Opcode != OPCODE_COS &&
inst->Opcode != OPCODE_SIN &&
inst->Opcode != OPCODE_SCS)
if (inst->I.Opcode != OPCODE_COS &&
inst->I.Opcode != OPCODE_SIN &&
inst->I.Opcode != OPCODE_SCS)
return GL_FALSE;
GLuint constants[2];
GLuint tempreg = radeonFindFreeTemporary(t);
GLuint tempreg = rc_find_free_temporary(c);
sincos_constants(t, constants);
sincos_constants(c, constants);
if (inst->Opcode == OPCODE_COS) {
if (inst->I.Opcode == OPCODE_COS) {
// MAD tmp.x, src, 1/(2*PI), 0.75
// FRC tmp.x, tmp.x
// MAD tmp.z, tmp.x, 2*PI, -PI
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
sin_approx(t, inst->DstReg,
sin_approx(c, inst->Prev, inst->I.DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
} else if (inst->Opcode == OPCODE_SIN) {
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
} else if (inst->I.Opcode == OPCODE_SIN) {
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
sin_approx(t, inst->DstReg,
sin_approx(c, inst->Prev, inst->I.DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
} else {
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
struct prog_dst_register dst = inst->DstReg;
struct prog_dst_register dst = inst->I.DstReg;
dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
sin_approx(t, dst,
dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X;
sin_approx(c, inst->Prev, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
constants);
dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
sin_approx(t, dst,
dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y;
sin_approx(c, inst->Prev, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
constants);
}
rc_remove_instruction(inst);
return GL_TRUE;
}
@ -606,50 +622,52 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
*
* @warning This transformation implicitly changes the semantics of SIN and COS!
*/
GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
struct prog_instruction* inst,
GLboolean radeonTransformTrigScale(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
if (inst->Opcode != OPCODE_COS &&
inst->Opcode != OPCODE_SIN &&
inst->Opcode != OPCODE_SCS)
if (inst->I.Opcode != OPCODE_COS &&
inst->I.Opcode != OPCODE_SIN &&
inst->I.Opcode != OPCODE_SCS)
return GL_FALSE;
static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
static const GLfloat RCP_2PI = 0.15915494309189535;
GLuint temp;
GLuint constant;
GLuint constant_swizzle;
temp = radeonFindFreeTemporary(t);
constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
temp = rc_find_free_temporary(c);
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
srcreg(PROGRAM_TEMPORARY, temp));
if (inst->Opcode == OPCODE_COS) {
emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
if (inst->I.Opcode == OPCODE_COS) {
emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
} else if (inst->Opcode == OPCODE_SIN) {
emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
} else if (inst->Opcode == OPCODE_SCS) {
struct prog_dst_register moddst = inst->DstReg;
} else if (inst->I.Opcode == OPCODE_SIN) {
emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode,
inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
} else if (inst->I.Opcode == OPCODE_SCS) {
struct prog_dst_register moddst = inst->I.DstReg;
if (inst->DstReg.WriteMask & WRITEMASK_X) {
if (inst->I.DstReg.WriteMask & WRITEMASK_X) {
moddst.WriteMask = WRITEMASK_X;
emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
if (inst->DstReg.WriteMask & WRITEMASK_Y) {
if (inst->I.DstReg.WriteMask & WRITEMASK_Y) {
moddst.WriteMask = WRITEMASK_Y;
emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
}
rc_remove_instruction(inst);
return GL_TRUE;
}
@ -661,21 +679,15 @@ GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
* @warning This explicitly changes the form of DDX and DDY!
*/
GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
struct prog_instruction* inst,
GLboolean radeonTransformDeriv(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY)
return GL_FALSE;
struct prog_src_register B = inst->SrcReg[1];
B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
SWIZZLE_ONE, SWIZZLE_ONE);
B.Negate = NEGATE_XYZW;
emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
inst->SrcReg[0], B);
inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
inst->I.SrcReg[1].Negate = NEGATE_XYZW;
return GL_TRUE;
}

View file

@ -31,28 +31,28 @@
#include "radeon_program.h"
GLboolean radeonTransformALU(
struct radeon_transform_context *t,
struct prog_instruction*,
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
GLboolean r300_transform_vertex_alu(
struct radeon_transform_context *t,
struct prog_instruction*,
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
GLboolean radeonTransformTrigSimple(
struct radeon_transform_context *t,
struct prog_instruction*,
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
GLboolean radeonTransformTrigScale(
struct radeon_transform_context *t,
struct prog_instruction*,
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
GLboolean radeonTransformDeriv(
struct radeon_transform_context *t,
struct prog_instruction*,
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
#endif /* __RADEON_PROGRAM_ALU_H_ */

View file

@ -1063,24 +1063,6 @@ r300FetchStateParameter(GLcontext * ctx,
break;
}
case STATE_R300_TEXRECT_FACTOR:{
struct gl_texture_object *t =
ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX];
if (t && t->Image[0][t->BaseLevel]) {
struct gl_texture_image *image =
t->Image[0][t->BaseLevel];
value[0] = 1.0 / image->Width2;
value[1] = 1.0 / image->Height2;
} else {
value[0] = 1.0;
value[1] = 1.0;
}
value[2] = 1.0;
value[3] = 1.0;
break;
}
default:
break;
}
@ -2029,7 +2011,7 @@ void r300UpdateShaders(r300ContextPtr rmesa)
rmesa->radeon.NewGLState = 0;
}
static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index)
static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer)
{
static const GLfloat dummy[4] = { 0, 0, 0, 0 };
r300ContextPtr rmesa = R300_CONTEXT(ctx);
@ -2041,6 +2023,47 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index)
return fp->Base->Parameters->ParameterValues[rcc->u.External];
case RC_CONSTANT_IMMEDIATE:
return rcc->u.Immediate;
case RC_CONSTANT_STATE:
switch(rcc->u.State[0]) {
case RC_STATE_SHADOW_AMBIENT: {
const int unit = (int) rcc->u.State[1];
const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
if (texObj) {
buffer[0] =
buffer[1] =
buffer[2] =
buffer[3] = texObj->CompareFailValue;
}
return buffer;
}
case RC_STATE_R300_WINDOW_DIMENSION: {
__DRIdrawablePrivate * drawable = radeon_get_drawable(&rmesa->radeon);
buffer[0] = drawable->w * 0.5f; /* width*0.5 */
buffer[1] = drawable->h * 0.5f; /* height*0.5 */
buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */
buffer[3] = 1.0F; /* not used */
return buffer;
}
case RC_STATE_R300_TEXRECT_FACTOR: {
struct gl_texture_object *t =
ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX];
if (t && t->Image[0][t->BaseLevel]) {
struct gl_texture_image *image =
t->Image[0][t->BaseLevel];
buffer[0] = 1.0 / image->Width2;
buffer[1] = 1.0 / image->Height2;
} else {
buffer[0] = 1.0;
buffer[1] = 1.0;
}
buffer[2] = 1.0;
buffer[3] = 1.0;
return buffer;
}
}
}
return dummy;
@ -2096,7 +2119,8 @@ static void r300SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, fpp);
rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4);
for (i = 0; i < fp->code.constants.Count; i++) {
const GLfloat *constant = get_fragmentprogram_constant(ctx, i);
GLfloat buffer[4];
const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
@ -2157,7 +2181,8 @@ static void r500SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, r500fp_const);
for (i = 0; i < fp->code.constants.Count; i++) {
const GLfloat *constant = get_fragmentprogram_constant(ctx, i);
GLfloat buffer[4];
const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);