mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
r300/compiler: Refactor local transforms to use rc_program
Signed-off-by: Nicolai Hähnle <nhaehnle@gmail.com>
This commit is contained in:
parent
800f482586
commit
6f4608f53c
14 changed files with 660 additions and 642 deletions
|
|
@ -31,16 +31,12 @@
|
|||
|
||||
#include "../r300_reg.h"
|
||||
|
||||
static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
|
||||
static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
|
||||
{
|
||||
gl_state_index fail_value_tokens[STATE_LENGTH] = {
|
||||
STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
|
||||
};
|
||||
struct prog_src_register reg = { 0, };
|
||||
|
||||
fail_value_tokens[2] = tmu;
|
||||
reg.File = PROGRAM_STATE_VAR;
|
||||
reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
|
||||
reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
|
||||
reg.Swizzle = SWIZZLE_WWWW;
|
||||
return reg;
|
||||
}
|
||||
|
|
@ -50,173 +46,146 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t
|
|||
* - premultiply texture coordinates for RECT
|
||||
* - extract operand swizzles
|
||||
* - introduce a temporary register when write masks are needed
|
||||
*
|
||||
* \todo If/when r5xx uses the radeon_program architecture, this can probably
|
||||
* be reused.
|
||||
*/
|
||||
GLboolean r300_transform_TEX(
|
||||
struct radeon_transform_context *t,
|
||||
struct prog_instruction* orig_inst, void* data)
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction* inst,
|
||||
void* data)
|
||||
{
|
||||
struct r300_fragment_program_compiler *compiler =
|
||||
(struct r300_fragment_program_compiler*)data;
|
||||
struct prog_instruction inst = *orig_inst;
|
||||
struct prog_instruction* tgt;
|
||||
GLboolean destredirect = GL_FALSE;
|
||||
|
||||
if (inst.Opcode != OPCODE_TEX &&
|
||||
inst.Opcode != OPCODE_TXB &&
|
||||
inst.Opcode != OPCODE_TXP &&
|
||||
inst.Opcode != OPCODE_KIL)
|
||||
if (inst->I.Opcode != OPCODE_TEX &&
|
||||
inst->I.Opcode != OPCODE_TXB &&
|
||||
inst->I.Opcode != OPCODE_TXP &&
|
||||
inst->I.Opcode != OPCODE_KIL)
|
||||
return GL_FALSE;
|
||||
|
||||
if (inst.Opcode != OPCODE_KIL &&
|
||||
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
|
||||
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
|
||||
/* ARB_shadow & EXT_shadow_funcs */
|
||||
if (inst->I.Opcode != OPCODE_KIL &&
|
||||
c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
|
||||
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
|
||||
|
||||
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
|
||||
tgt = radeonAppendInstructions(t->Program, 1);
|
||||
inst->I.Opcode = OPCODE_MOV;
|
||||
|
||||
tgt->Opcode = OPCODE_MOV;
|
||||
tgt->DstReg = inst.DstReg;
|
||||
if (comparefunc == GL_ALWAYS) {
|
||||
tgt->SrcReg[0].File = PROGRAM_BUILTIN;
|
||||
tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
|
||||
inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
|
||||
inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
|
||||
} else {
|
||||
tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
|
||||
inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
} else {
|
||||
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
|
||||
GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
|
||||
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
|
||||
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
|
||||
struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
|
||||
int pass, fail;
|
||||
|
||||
inst_rcp->I.Opcode = OPCODE_RCP;
|
||||
inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
|
||||
inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
|
||||
inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
|
||||
|
||||
inst_cmp->I.DstReg = inst->I.DstReg;
|
||||
inst->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst->I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
|
||||
inst_mad->I.Opcode = OPCODE_MAD;
|
||||
inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
|
||||
inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
|
||||
inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
|
||||
inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
|
||||
inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
|
||||
inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
|
||||
inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
|
||||
if (depthmode == 0) /* GL_LUMINANCE */
|
||||
inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
|
||||
else if (depthmode == 2) /* GL_ALPHA */
|
||||
inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
|
||||
|
||||
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
|
||||
* r < tex <=> -tex+r < 0
|
||||
* r >= tex <=> not (-tex+r < 0 */
|
||||
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
|
||||
inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
|
||||
else
|
||||
inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
|
||||
|
||||
inst_cmp->I.Opcode = OPCODE_CMP;
|
||||
/* DstReg has been filled out above */
|
||||
inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
|
||||
|
||||
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
|
||||
pass = 1;
|
||||
fail = 2;
|
||||
} else {
|
||||
pass = 2;
|
||||
fail = 1;
|
||||
}
|
||||
|
||||
inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
|
||||
inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
|
||||
inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
|
||||
}
|
||||
|
||||
inst.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst.DstReg.Index = radeonFindFreeTemporary(t);
|
||||
inst.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
}
|
||||
|
||||
|
||||
/* Hardware uses [0..1]x[0..1] range for rectangle textures
|
||||
* instead of [0..Width]x[0..Height].
|
||||
* Add a scaling instruction.
|
||||
*/
|
||||
if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
|
||||
gl_state_index tokens[STATE_LENGTH] = {
|
||||
STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
|
||||
0
|
||||
};
|
||||
if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) {
|
||||
struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
int tempreg = radeonFindFreeTemporary(t);
|
||||
int factor_index;
|
||||
inst_mul->I.Opcode = OPCODE_MUL;
|
||||
inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_mul->I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst_mul->I.SrcReg[0] = inst->I.SrcReg[0];
|
||||
inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR;
|
||||
inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit);
|
||||
|
||||
tokens[2] = inst.TexSrcUnit;
|
||||
factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
|
||||
|
||||
tgt = radeonAppendInstructions(t->Program, 1);
|
||||
|
||||
tgt->Opcode = OPCODE_MUL;
|
||||
tgt->DstReg.File = PROGRAM_TEMPORARY;
|
||||
tgt->DstReg.Index = tempreg;
|
||||
tgt->SrcReg[0] = inst.SrcReg[0];
|
||||
tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
|
||||
tgt->SrcReg[1].Index = factor_index;
|
||||
|
||||
reset_srcreg(&inst.SrcReg[0]);
|
||||
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst.SrcReg[0].Index = tempreg;
|
||||
reset_srcreg(&inst->I.SrcReg[0]);
|
||||
inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index;
|
||||
}
|
||||
|
||||
if (inst.Opcode != OPCODE_KIL) {
|
||||
if (inst.DstReg.File != PROGRAM_TEMPORARY ||
|
||||
inst.DstReg.WriteMask != WRITEMASK_XYZW) {
|
||||
int tempreg = radeonFindFreeTemporary(t);
|
||||
/* Cannot write texture to output registers or with masks */
|
||||
if (inst->I.Opcode != OPCODE_KIL &&
|
||||
(inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) {
|
||||
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
|
||||
|
||||
inst.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst.DstReg.Index = tempreg;
|
||||
inst.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
destredirect = GL_TRUE;
|
||||
} else if (inst.SaturateMode) {
|
||||
destredirect = GL_TRUE;
|
||||
}
|
||||
inst_mov->I.Opcode = OPCODE_MOV;
|
||||
inst_mov->I.DstReg = inst->I.DstReg;
|
||||
inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
|
||||
|
||||
inst->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
|
||||
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
}
|
||||
|
||||
if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
|
||||
int tmpreg = radeonFindFreeTemporary(t);
|
||||
tgt = radeonAppendInstructions(t->Program, 1);
|
||||
tgt->Opcode = OPCODE_MOV;
|
||||
tgt->DstReg.File = PROGRAM_TEMPORARY;
|
||||
tgt->DstReg.Index = tmpreg;
|
||||
tgt->SrcReg[0] = inst.SrcReg[0];
|
||||
|
||||
reset_srcreg(&inst.SrcReg[0]);
|
||||
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst.SrcReg[0].Index = tmpreg;
|
||||
}
|
||||
/* Cannot read texture coordinate from constants file */
|
||||
if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
|
||||
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
tgt = radeonAppendInstructions(t->Program, 1);
|
||||
_mesa_copy_instructions(tgt, &inst, 1);
|
||||
inst_mov->I.Opcode = OPCODE_MOV;
|
||||
inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
|
||||
|
||||
if (inst.Opcode != OPCODE_KIL &&
|
||||
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
|
||||
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
|
||||
GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode;
|
||||
int rcptemp = radeonFindFreeTemporary(t);
|
||||
int pass, fail;
|
||||
|
||||
tgt = radeonAppendInstructions(t->Program, 3);
|
||||
|
||||
tgt[0].Opcode = OPCODE_RCP;
|
||||
tgt[0].DstReg.File = PROGRAM_TEMPORARY;
|
||||
tgt[0].DstReg.Index = rcptemp;
|
||||
tgt[0].DstReg.WriteMask = WRITEMASK_W;
|
||||
tgt[0].SrcReg[0] = inst.SrcReg[0];
|
||||
tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
|
||||
|
||||
tgt[1].Opcode = OPCODE_MAD;
|
||||
tgt[1].DstReg = inst.DstReg;
|
||||
tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
|
||||
tgt[1].SrcReg[0] = inst.SrcReg[0];
|
||||
tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
|
||||
tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
|
||||
tgt[1].SrcReg[1].Index = rcptemp;
|
||||
tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
|
||||
tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
|
||||
tgt[1].SrcReg[2].Index = inst.DstReg.Index;
|
||||
if (depthmode == 0) /* GL_LUMINANCE */
|
||||
tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
|
||||
else if (depthmode == 2) /* GL_ALPHA */
|
||||
tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
|
||||
|
||||
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
|
||||
* r < tex <=> -tex+r < 0
|
||||
* r >= tex <=> not (-tex+r < 0 */
|
||||
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
|
||||
tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
|
||||
else
|
||||
tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
|
||||
|
||||
tgt[2].Opcode = OPCODE_CMP;
|
||||
tgt[2].DstReg = orig_inst->DstReg;
|
||||
tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
|
||||
|
||||
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
|
||||
pass = 1;
|
||||
fail = 2;
|
||||
} else {
|
||||
pass = 2;
|
||||
fail = 1;
|
||||
}
|
||||
|
||||
tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
|
||||
tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
|
||||
tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
|
||||
} else if (destredirect) {
|
||||
tgt = radeonAppendInstructions(t->Program, 1);
|
||||
|
||||
tgt->Opcode = OPCODE_MOV;
|
||||
tgt->DstReg = orig_inst->DstReg;
|
||||
tgt->SaturateMode = inst.SaturateMode;
|
||||
tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
tgt->SrcReg[0].Index = inst.DstReg.Index;
|
||||
reset_srcreg(&inst->I.SrcReg[0]);
|
||||
inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
|
|
|
|||
|
|
@ -44,6 +44,6 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler
|
|||
|
||||
extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
|
||||
|
||||
extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
|
||||
extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -250,6 +250,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
|
|||
|
||||
rewrite_depth_out(c->program);
|
||||
|
||||
rc_mesa_to_rc_program(&c->Base, c->program);
|
||||
|
||||
if (c->is_r500) {
|
||||
struct radeon_program_transformation transformations[] = {
|
||||
{ &r500_transform_TEX, c },
|
||||
|
|
@ -257,24 +259,22 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
|
|||
{ &radeonTransformDeriv, 0 },
|
||||
{ &radeonTransformTrigScale, 0 }
|
||||
};
|
||||
radeonLocalTransform(c->program, 4, transformations);
|
||||
radeonLocalTransform(&c->Base, 4, transformations);
|
||||
} else {
|
||||
struct radeon_program_transformation transformations[] = {
|
||||
{ &r300_transform_TEX, c },
|
||||
{ &radeonTransformALU, 0 },
|
||||
{ &radeonTransformTrigSimple, 0 }
|
||||
};
|
||||
radeonLocalTransform(c->program, 3, transformations);
|
||||
radeonLocalTransform(&c->Base, 3, transformations);
|
||||
}
|
||||
|
||||
if (c->Base.Debug) {
|
||||
_mesa_printf("Fragment Program: After native rewrite:\n");
|
||||
_mesa_print_program(c->program);
|
||||
rc_print_program(&c->Base.Program);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
rc_mesa_to_rc_program(&c->Base, c->program);
|
||||
|
||||
if (c->is_r500) {
|
||||
struct radeon_nqssadce_descr nqssadce = {
|
||||
.Init = &nqssadce_init,
|
||||
|
|
|
|||
|
|
@ -538,47 +538,43 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
|
|||
* Introduce intermediate MOVs to temporary registers to account for this.
|
||||
*/
|
||||
static GLboolean transform_source_conflicts(
|
||||
struct radeon_transform_context *t,
|
||||
struct prog_instruction* orig_inst,
|
||||
struct radeon_compiler *c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
{
|
||||
struct prog_instruction inst = *orig_inst;
|
||||
struct prog_instruction * dst;
|
||||
GLuint num_operands = _mesa_num_inst_src_regs(inst.Opcode);
|
||||
GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
|
||||
|
||||
if (num_operands == 3) {
|
||||
if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[2])
|
||||
|| t_src_conflict(inst.SrcReg[0], inst.SrcReg[2])) {
|
||||
int tmpreg = radeonFindFreeTemporary(t);
|
||||
struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
|
||||
inst_mov->Opcode = OPCODE_MOV;
|
||||
inst_mov->DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_mov->DstReg.Index = tmpreg;
|
||||
inst_mov->SrcReg[0] = inst.SrcReg[2];
|
||||
if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
|
||||
|| t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
|
||||
int tmpreg = rc_find_free_temporary(c);
|
||||
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_mov->I.Opcode = OPCODE_MOV;
|
||||
inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_mov->I.DstReg.Index = tmpreg;
|
||||
inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
|
||||
|
||||
reset_srcreg(&inst.SrcReg[2]);
|
||||
inst.SrcReg[2].File = PROGRAM_TEMPORARY;
|
||||
inst.SrcReg[2].Index = tmpreg;
|
||||
reset_srcreg(&inst->I.SrcReg[2]);
|
||||
inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
|
||||
inst->I.SrcReg[2].Index = tmpreg;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_operands >= 2) {
|
||||
if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[0])) {
|
||||
int tmpreg = radeonFindFreeTemporary(t);
|
||||
struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
|
||||
inst_mov->Opcode = OPCODE_MOV;
|
||||
inst_mov->DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_mov->DstReg.Index = tmpreg;
|
||||
inst_mov->SrcReg[0] = inst.SrcReg[1];
|
||||
if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
|
||||
int tmpreg = rc_find_free_temporary(c);
|
||||
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_mov->I.Opcode = OPCODE_MOV;
|
||||
inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_mov->I.DstReg.Index = tmpreg;
|
||||
inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
|
||||
|
||||
reset_srcreg(&inst.SrcReg[1]);
|
||||
inst.SrcReg[1].File = PROGRAM_TEMPORARY;
|
||||
inst.SrcReg[1].Index = tmpreg;
|
||||
reset_srcreg(&inst->I.SrcReg[1]);
|
||||
inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
|
||||
inst->I.SrcReg[1].Index = tmpreg;
|
||||
}
|
||||
}
|
||||
|
||||
dst = radeonAppendInstructions(t->Program, 1);
|
||||
*dst = inst;
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
|
@ -782,16 +778,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
|
|||
|
||||
addArtificialOutputs(compiler);
|
||||
|
||||
rc_mesa_to_rc_program(&compiler->Base, compiler->program);
|
||||
|
||||
{
|
||||
struct radeon_program_transformation transformations[] = {
|
||||
{ &r300_transform_vertex_alu, 0 },
|
||||
};
|
||||
radeonLocalTransform(compiler->program, 1, transformations);
|
||||
radeonLocalTransform(&compiler->Base, 1, transformations);
|
||||
}
|
||||
|
||||
if (compiler->Base.Debug) {
|
||||
fprintf(stderr, "Vertex program after native rewrite:\n");
|
||||
_mesa_print_program(compiler->program);
|
||||
rc_print_program(&compiler->Base.Program);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
|
|
@ -803,17 +801,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
|
|||
struct radeon_program_transformation transformations[] = {
|
||||
{ &transform_source_conflicts, 0 },
|
||||
};
|
||||
radeonLocalTransform(compiler->program, 1, transformations);
|
||||
radeonLocalTransform(&compiler->Base, 1, transformations);
|
||||
}
|
||||
|
||||
if (compiler->Base.Debug) {
|
||||
fprintf(stderr, "Vertex program after source conflict resolve:\n");
|
||||
_mesa_print_program(compiler->program);
|
||||
rc_print_program(&compiler->Base.Program);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
rc_mesa_to_rc_program(&compiler->Base, compiler->program);
|
||||
|
||||
{
|
||||
struct radeon_nqssadce_descr nqssadce = {
|
||||
.Init = &nqssadceInit,
|
||||
|
|
|
|||
|
|
@ -29,152 +29,139 @@
|
|||
|
||||
#include "../r300_reg.h"
|
||||
|
||||
static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
|
||||
static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
|
||||
{
|
||||
gl_state_index fail_value_tokens[STATE_LENGTH] = {
|
||||
STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
|
||||
};
|
||||
struct prog_src_register reg = { 0, };
|
||||
|
||||
fail_value_tokens[2] = tmu;
|
||||
reg.File = PROGRAM_STATE_VAR;
|
||||
reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
|
||||
reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
|
||||
reg.Swizzle = SWIZZLE_WWWW;
|
||||
return reg;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform TEX, TXP, TXB, and KIL instructions in the following way:
|
||||
* - premultiply texture coordinates for RECT
|
||||
* - extract operand swizzles
|
||||
* - introduce a temporary register when write masks are needed
|
||||
*
|
||||
* - implement texture compare (shadow extensions)
|
||||
* - extract non-native source / destination operands
|
||||
*/
|
||||
GLboolean r500_transform_TEX(
|
||||
struct radeon_transform_context *t,
|
||||
struct prog_instruction* orig_inst, void* data)
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void* data)
|
||||
{
|
||||
struct r300_fragment_program_compiler *compiler =
|
||||
(struct r300_fragment_program_compiler*)data;
|
||||
struct prog_instruction inst = *orig_inst;
|
||||
struct prog_instruction* tgt;
|
||||
GLboolean destredirect = GL_FALSE;
|
||||
|
||||
if (inst.Opcode != OPCODE_TEX &&
|
||||
inst.Opcode != OPCODE_TXB &&
|
||||
inst.Opcode != OPCODE_TXP &&
|
||||
inst.Opcode != OPCODE_KIL)
|
||||
if (inst->I.Opcode != OPCODE_TEX &&
|
||||
inst->I.Opcode != OPCODE_TXB &&
|
||||
inst->I.Opcode != OPCODE_TXP &&
|
||||
inst->I.Opcode != OPCODE_KIL)
|
||||
return GL_FALSE;
|
||||
|
||||
/* ARB_shadow & EXT_shadow_funcs */
|
||||
if (inst.Opcode != OPCODE_KIL &&
|
||||
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
|
||||
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
|
||||
if (inst->I.Opcode != OPCODE_KIL &&
|
||||
c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
|
||||
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
|
||||
|
||||
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
|
||||
tgt = radeonAppendInstructions(t->Program, 1);
|
||||
inst->I.Opcode = OPCODE_MOV;
|
||||
|
||||
tgt->Opcode = OPCODE_MOV;
|
||||
tgt->DstReg = inst.DstReg;
|
||||
if (comparefunc == GL_ALWAYS) {
|
||||
tgt->SrcReg[0].File = PROGRAM_BUILTIN;
|
||||
tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
|
||||
inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
|
||||
inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
|
||||
} else {
|
||||
tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
|
||||
inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
inst.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst.DstReg.Index = radeonFindFreeTemporary(t);
|
||||
inst.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
} else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
|
||||
int tempreg = radeonFindFreeTemporary(t);
|
||||
|
||||
inst.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst.DstReg.Index = tempreg;
|
||||
inst.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
destredirect = GL_TRUE;
|
||||
}
|
||||
|
||||
if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
|
||||
int tmpreg = radeonFindFreeTemporary(t);
|
||||
tgt = radeonAppendInstructions(t->Program, 1);
|
||||
tgt->Opcode = OPCODE_MOV;
|
||||
tgt->DstReg.File = PROGRAM_TEMPORARY;
|
||||
tgt->DstReg.Index = tmpreg;
|
||||
tgt->SrcReg[0] = inst.SrcReg[0];
|
||||
|
||||
reset_srcreg(&inst.SrcReg[0]);
|
||||
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst.SrcReg[0].Index = tmpreg;
|
||||
}
|
||||
|
||||
tgt = radeonAppendInstructions(t->Program, 1);
|
||||
_mesa_copy_instructions(tgt, &inst, 1);
|
||||
|
||||
if (inst.Opcode != OPCODE_KIL &&
|
||||
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
|
||||
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
|
||||
GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode;
|
||||
int rcptemp = radeonFindFreeTemporary(t);
|
||||
int pass, fail;
|
||||
|
||||
tgt = radeonAppendInstructions(t->Program, 3);
|
||||
|
||||
tgt[0].Opcode = OPCODE_RCP;
|
||||
tgt[0].DstReg.File = PROGRAM_TEMPORARY;
|
||||
tgt[0].DstReg.Index = rcptemp;
|
||||
tgt[0].DstReg.WriteMask = WRITEMASK_W;
|
||||
tgt[0].SrcReg[0] = inst.SrcReg[0];
|
||||
tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
|
||||
|
||||
tgt[1].Opcode = OPCODE_MAD;
|
||||
tgt[1].DstReg = inst.DstReg;
|
||||
tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
|
||||
tgt[1].SrcReg[0] = inst.SrcReg[0];
|
||||
tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
|
||||
tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
|
||||
tgt[1].SrcReg[1].Index = rcptemp;
|
||||
tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
|
||||
tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
|
||||
tgt[1].SrcReg[2].Index = inst.DstReg.Index;
|
||||
if (depthmode == 0) /* GL_LUMINANCE */
|
||||
tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
|
||||
else if (depthmode == 2) /* GL_ALPHA */
|
||||
tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
|
||||
|
||||
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
|
||||
* r < tex <=> -tex+r < 0
|
||||
* r >= tex <=> not (-tex+r < 0 */
|
||||
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
|
||||
tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
|
||||
else
|
||||
tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
|
||||
|
||||
tgt[2].Opcode = OPCODE_CMP;
|
||||
tgt[2].DstReg = orig_inst->DstReg;
|
||||
tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
|
||||
|
||||
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
|
||||
pass = 1;
|
||||
fail = 2;
|
||||
} else {
|
||||
pass = 2;
|
||||
fail = 1;
|
||||
GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
|
||||
GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
|
||||
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
|
||||
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
|
||||
struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
|
||||
int pass, fail;
|
||||
|
||||
inst_rcp->I.Opcode = OPCODE_RCP;
|
||||
inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
|
||||
inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
|
||||
inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
|
||||
|
||||
inst_cmp->I.DstReg = inst->I.DstReg;
|
||||
inst->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst->I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
|
||||
inst_mad->I.Opcode = OPCODE_MAD;
|
||||
inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
|
||||
inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
|
||||
inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
|
||||
inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
|
||||
inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
|
||||
inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
|
||||
inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
|
||||
if (depthmode == 0) /* GL_LUMINANCE */
|
||||
inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
|
||||
else if (depthmode == 2) /* GL_ALPHA */
|
||||
inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
|
||||
|
||||
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
|
||||
* r < tex <=> -tex+r < 0
|
||||
* r >= tex <=> not (-tex+r < 0 */
|
||||
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
|
||||
inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
|
||||
else
|
||||
inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
|
||||
|
||||
inst_cmp->I.Opcode = OPCODE_CMP;
|
||||
/* DstReg has been filled out above */
|
||||
inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
|
||||
|
||||
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
|
||||
pass = 1;
|
||||
fail = 2;
|
||||
} else {
|
||||
pass = 2;
|
||||
fail = 1;
|
||||
}
|
||||
|
||||
inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
|
||||
inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
|
||||
inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
|
||||
}
|
||||
}
|
||||
|
||||
tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
|
||||
tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
|
||||
tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
|
||||
} else if (destredirect) {
|
||||
tgt = radeonAppendInstructions(t->Program, 1);
|
||||
/* Cannot write texture to output registers */
|
||||
if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) {
|
||||
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
|
||||
|
||||
tgt->Opcode = OPCODE_MOV;
|
||||
tgt->DstReg = orig_inst->DstReg;
|
||||
tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
tgt->SrcReg[0].Index = inst.DstReg.Index;
|
||||
inst_mov->I.Opcode = OPCODE_MOV;
|
||||
inst_mov->I.DstReg = inst->I.DstReg;
|
||||
inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
|
||||
|
||||
inst->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
|
||||
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
}
|
||||
|
||||
/* Cannot read texture coordinate from constants file */
|
||||
if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
|
||||
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
inst_mov->I.Opcode = OPCODE_MOV;
|
||||
inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
|
||||
|
||||
reset_srcreg(&inst->I.SrcReg[0]);
|
||||
inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
|
|
|
|||
|
|
@ -47,6 +47,9 @@ extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register r
|
|||
|
||||
extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
|
||||
|
||||
extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
|
||||
extern GLboolean r500_transform_TEX(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void* data);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -76,3 +76,95 @@ unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * cons
|
|||
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add a state vector to the constant list, while trying to avoid duplicates.
|
||||
*/
|
||||
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
|
||||
{
|
||||
unsigned index;
|
||||
struct rc_constant constant;
|
||||
|
||||
for(index = 0; index < c->Count; ++index) {
|
||||
if (c->Constants[index].Type == RC_CONSTANT_STATE) {
|
||||
if (c->Constants[index].u.State[0] == state0 &&
|
||||
c->Constants[index].u.State[1] == state1)
|
||||
return index;
|
||||
}
|
||||
}
|
||||
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.Type = RC_CONSTANT_STATE;
|
||||
constant.Size = 4;
|
||||
constant.u.State[0] = state0;
|
||||
constant.u.State[1] = state1;
|
||||
|
||||
return rc_constants_add(c, &constant);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add an immediate vector to the constant list, while trying to avoid
|
||||
* duplicates.
|
||||
*/
|
||||
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
|
||||
{
|
||||
unsigned index;
|
||||
struct rc_constant constant;
|
||||
|
||||
for(index = 0; index < c->Count; ++index) {
|
||||
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
|
||||
if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
|
||||
return index;
|
||||
}
|
||||
}
|
||||
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.Type = RC_CONSTANT_IMMEDIATE;
|
||||
constant.Size = 4;
|
||||
memcpy(constant.u.Immediate, data, sizeof(float) * 4);
|
||||
|
||||
return rc_constants_add(c, &constant);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add an immediate scalar to the constant list, while trying to avoid
|
||||
* duplicates.
|
||||
*/
|
||||
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
|
||||
{
|
||||
unsigned index;
|
||||
int free_index = -1;
|
||||
struct rc_constant constant;
|
||||
|
||||
for(index = 0; index < c->Count; ++index) {
|
||||
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
|
||||
for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) {
|
||||
if (c->Constants[index].u.Immediate[comp] == data) {
|
||||
*swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
|
||||
return index;
|
||||
}
|
||||
}
|
||||
|
||||
if (c->Constants[index].Size < 4)
|
||||
free_index = index;
|
||||
}
|
||||
}
|
||||
|
||||
if (free_index >= 0) {
|
||||
unsigned comp = c->Constants[free_index].Size++;
|
||||
c->Constants[free_index].u.Immediate[comp] = data;
|
||||
*swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
|
||||
return free_index;
|
||||
}
|
||||
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.Type = RC_CONSTANT_IMMEDIATE;
|
||||
constant.Size = 1;
|
||||
constant.u.Immediate[0] = data;
|
||||
*swizzle = SWIZZLE_XXXX;
|
||||
|
||||
return rc_constants_add(c, &constant);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,8 +35,6 @@
|
|||
|
||||
|
||||
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
|
||||
#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
|
||||
|
||||
|
||||
enum {
|
||||
/**
|
||||
|
|
@ -50,17 +48,26 @@ enum {
|
|||
|
||||
/**
|
||||
* Constant referring to state that is known by this compiler,
|
||||
* i.e. *not* arbitrary Mesa (or other) state.
|
||||
* see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
|
||||
*/
|
||||
RC_CONSTANT_STATE
|
||||
};
|
||||
|
||||
enum {
|
||||
RC_STATE_SHADOW_AMBIENT = 0,
|
||||
|
||||
RC_STATE_R300_WINDOW_DIMENSION,
|
||||
RC_STATE_R300_TEXRECT_FACTOR
|
||||
};
|
||||
|
||||
struct rc_constant {
|
||||
unsigned Type:2; /**< RC_CONSTANT_xxx */
|
||||
unsigned Size:3;
|
||||
|
||||
union {
|
||||
unsigned External;
|
||||
float Immediate[4];
|
||||
unsigned State[4];
|
||||
unsigned State[2];
|
||||
} u;
|
||||
};
|
||||
|
||||
|
|
@ -75,6 +82,9 @@ void rc_constants_init(struct rc_constant_list * c);
|
|||
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
|
||||
void rc_constants_destroy(struct rc_constant_list * c);
|
||||
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
|
||||
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
|
||||
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
|
||||
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
|
||||
|
||||
/**
|
||||
* Stores state that influences the compilation of a fragment program.
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...);
|
|||
struct r300_fragment_program_compiler {
|
||||
struct radeon_compiler Base;
|
||||
struct rX00_fragment_program_code *code;
|
||||
struct gl_program *program;
|
||||
struct gl_program * program;
|
||||
struct r300_fragment_program_external_state state;
|
||||
GLboolean is_r500;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -48,83 +48,25 @@
|
|||
* one instruction at a time.
|
||||
*/
|
||||
void radeonLocalTransform(
|
||||
struct gl_program *program,
|
||||
struct radeon_compiler * c,
|
||||
int num_transformations,
|
||||
struct radeon_program_transformation* transformations)
|
||||
{
|
||||
struct radeon_transform_context ctx;
|
||||
int ip;
|
||||
struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
|
||||
ctx.Program = program;
|
||||
ctx.OldInstructions = program->Instructions;
|
||||
ctx.OldNumInstructions = program->NumInstructions;
|
||||
|
||||
program->Instructions = 0;
|
||||
program->NumInstructions = 0;
|
||||
|
||||
for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
|
||||
struct prog_instruction *instr = ctx.OldInstructions + ip;
|
||||
while(inst != &c->Program.Instructions) {
|
||||
struct rc_instruction * current = inst;
|
||||
int i;
|
||||
|
||||
inst = inst->Next;
|
||||
|
||||
for(i = 0; i < num_transformations; ++i) {
|
||||
struct radeon_program_transformation* t = transformations + i;
|
||||
|
||||
if (t->function(&ctx, instr, t->userData))
|
||||
if (t->function(c, current, t->userData))
|
||||
break;
|
||||
}
|
||||
|
||||
if (i >= num_transformations) {
|
||||
struct prog_instruction* dest = radeonAppendInstructions(program, 1);
|
||||
_mesa_copy_instructions(dest, instr, 1);
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
|
||||
}
|
||||
|
||||
|
||||
static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
|
||||
{
|
||||
GLuint i;
|
||||
for (i = 0; i < count; i++) {
|
||||
const struct prog_instruction *inst = insts + i;
|
||||
const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
|
||||
GLuint k;
|
||||
|
||||
for (k = 0; k < n; k++) {
|
||||
if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
|
||||
used[inst->SrcReg[k].Index] = GL_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
|
||||
{
|
||||
GLboolean used[MAX_PROGRAM_TEMPS];
|
||||
GLuint i;
|
||||
|
||||
_mesa_memset(used, 0, sizeof(used));
|
||||
scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
|
||||
scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
|
||||
|
||||
for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
|
||||
if (!used[i])
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Append the given number of instructions to the program and return a
|
||||
* pointer to the first new instruction.
|
||||
*/
|
||||
struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count)
|
||||
{
|
||||
int oldnum = program->NumInstructions;
|
||||
_mesa_insert_instructions(program, oldnum, count);
|
||||
return program->Instructions + oldnum;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -206,6 +148,7 @@ void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * progr
|
|||
struct rc_constant constant;
|
||||
|
||||
constant.Type = RC_CONSTANT_EXTERNAL;
|
||||
constant.Size = 4;
|
||||
constant.u.External = i;
|
||||
|
||||
rc_constants_add(&c->Program.Constants, &constant);
|
||||
|
|
|
|||
|
|
@ -87,18 +87,6 @@ static INLINE void reset_srcreg(struct prog_src_register* reg)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Transformation context that is passed to local transformations.
|
||||
*
|
||||
* Care must be taken with some operations during transformation,
|
||||
* e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
|
||||
*/
|
||||
struct radeon_transform_context {
|
||||
struct gl_program *Program;
|
||||
struct prog_instruction *OldInstructions;
|
||||
GLuint OldNumInstructions;
|
||||
};
|
||||
|
||||
/**
|
||||
* A transformation that can be passed to \ref radeonLocalTransform.
|
||||
*
|
||||
|
|
@ -111,24 +99,17 @@ struct radeon_transform_context {
|
|||
*/
|
||||
struct radeon_program_transformation {
|
||||
GLboolean (*function)(
|
||||
struct radeon_transform_context*,
|
||||
struct prog_instruction*,
|
||||
struct radeon_compiler*,
|
||||
struct rc_instruction*,
|
||||
void*);
|
||||
void *userData;
|
||||
};
|
||||
|
||||
void radeonLocalTransform(
|
||||
struct gl_program *program,
|
||||
struct radeon_compiler *c,
|
||||
int num_transformations,
|
||||
struct radeon_program_transformation* transformations);
|
||||
|
||||
/**
|
||||
* Find a usable free temporary register during program transformation
|
||||
*/
|
||||
GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
|
||||
|
||||
struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
|
||||
|
||||
GLint rc_find_free_temporary(struct radeon_compiler * c);
|
||||
|
||||
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
|
||||
|
|
|
|||
|
|
@ -35,49 +35,52 @@
|
|||
|
||||
#include "radeon_program_alu.h"
|
||||
|
||||
#include "shader/prog_parameter.h"
|
||||
#include "radeon_compiler.h"
|
||||
|
||||
|
||||
static struct prog_instruction *emit1(struct gl_program* p,
|
||||
static struct rc_instruction *emit1(
|
||||
struct radeon_compiler * c, struct rc_instruction * after,
|
||||
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
|
||||
struct prog_src_register SrcReg)
|
||||
{
|
||||
struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
|
||||
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
|
||||
|
||||
fpi->Opcode = Opcode;
|
||||
fpi->SaturateMode = Saturate;
|
||||
fpi->DstReg = DstReg;
|
||||
fpi->SrcReg[0] = SrcReg;
|
||||
fpi->I.Opcode = Opcode;
|
||||
fpi->I.SaturateMode = Saturate;
|
||||
fpi->I.DstReg = DstReg;
|
||||
fpi->I.SrcReg[0] = SrcReg;
|
||||
return fpi;
|
||||
}
|
||||
|
||||
static struct prog_instruction *emit2(struct gl_program* p,
|
||||
static struct rc_instruction *emit2(
|
||||
struct radeon_compiler * c, struct rc_instruction * after,
|
||||
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
|
||||
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
|
||||
{
|
||||
struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
|
||||
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
|
||||
|
||||
fpi->Opcode = Opcode;
|
||||
fpi->SaturateMode = Saturate;
|
||||
fpi->DstReg = DstReg;
|
||||
fpi->SrcReg[0] = SrcReg0;
|
||||
fpi->SrcReg[1] = SrcReg1;
|
||||
fpi->I.Opcode = Opcode;
|
||||
fpi->I.SaturateMode = Saturate;
|
||||
fpi->I.DstReg = DstReg;
|
||||
fpi->I.SrcReg[0] = SrcReg0;
|
||||
fpi->I.SrcReg[1] = SrcReg1;
|
||||
return fpi;
|
||||
}
|
||||
|
||||
static struct prog_instruction *emit3(struct gl_program* p,
|
||||
static struct rc_instruction *emit3(
|
||||
struct radeon_compiler * c, struct rc_instruction * after,
|
||||
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
|
||||
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
|
||||
struct prog_src_register SrcReg2)
|
||||
{
|
||||
struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
|
||||
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
|
||||
|
||||
fpi->Opcode = Opcode;
|
||||
fpi->SaturateMode = Saturate;
|
||||
fpi->DstReg = DstReg;
|
||||
fpi->SrcReg[0] = SrcReg0;
|
||||
fpi->SrcReg[1] = SrcReg1;
|
||||
fpi->SrcReg[2] = SrcReg2;
|
||||
fpi->I.Opcode = Opcode;
|
||||
fpi->I.SaturateMode = Saturate;
|
||||
fpi->I.DstReg = DstReg;
|
||||
fpi->I.SrcReg[0] = SrcReg0;
|
||||
fpi->I.SrcReg[1] = SrcReg1;
|
||||
fpi->I.SrcReg[2] = SrcReg2;
|
||||
return fpi;
|
||||
}
|
||||
|
||||
|
|
@ -171,58 +174,63 @@ static struct prog_src_register scalar(struct prog_src_register reg)
|
|||
return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
|
||||
}
|
||||
|
||||
static void transform_ABS(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_ABS(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
struct prog_src_register src = inst->SrcReg[0];
|
||||
struct prog_src_register src = inst->I.SrcReg[0];
|
||||
src.Abs = 1;
|
||||
src.Negate = NEGATE_NONE;
|
||||
emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
|
||||
emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src);
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_DP3(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_DP3(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
struct prog_src_register src0 = inst->SrcReg[0];
|
||||
struct prog_src_register src1 = inst->SrcReg[1];
|
||||
struct prog_src_register src0 = inst->I.SrcReg[0];
|
||||
struct prog_src_register src1 = inst->I.SrcReg[1];
|
||||
src0.Negate &= ~NEGATE_W;
|
||||
src0.Swizzle &= ~(7 << (3 * 3));
|
||||
src0.Swizzle |= SWIZZLE_ZERO << (3 * 3);
|
||||
src1.Negate &= ~NEGATE_W;
|
||||
src1.Swizzle &= ~(7 << (3 * 3));
|
||||
src1.Swizzle |= SWIZZLE_ZERO << (3 * 3);
|
||||
emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, src1);
|
||||
emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1);
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_DPH(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_DPH(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
struct prog_src_register src0 = inst->SrcReg[0];
|
||||
struct prog_src_register src0 = inst->I.SrcReg[0];
|
||||
src0.Negate &= ~NEGATE_W;
|
||||
src0.Swizzle &= ~(7 << (3 * 3));
|
||||
src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
|
||||
emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
|
||||
emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]);
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
/**
|
||||
* [1, src0.y*src1.y, src0.z, src1.w]
|
||||
* So basically MUL with lotsa swizzling.
|
||||
*/
|
||||
static void transform_DST(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_DST(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
|
||||
swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
|
||||
swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
|
||||
emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg,
|
||||
swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
|
||||
swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_FLR(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_FLR(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
int tempreg = radeonFindFreeTemporary(t);
|
||||
emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
|
||||
emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
|
||||
inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
|
||||
int tempreg = rc_find_free_temporary(c);
|
||||
emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]);
|
||||
emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg,
|
||||
inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -243,152 +251,159 @@ static void transform_FLR(struct radeon_transform_context* t,
|
|||
* 5 slots, if the subsequent optimization passes are clever enough
|
||||
* to pair instructions correctly.
|
||||
*/
|
||||
static void transform_LIT(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_LIT(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
static const GLfloat LitConst[4] = { -127.999999 };
|
||||
|
||||
GLuint constant;
|
||||
GLuint constant_swizzle;
|
||||
GLuint temp;
|
||||
int needTemporary = 0;
|
||||
struct prog_src_register srctemp;
|
||||
|
||||
constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
|
||||
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
|
||||
|
||||
if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
|
||||
needTemporary = 1;
|
||||
} else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
|
||||
// LIT is typically followed by DP3/DP4, so there's no point
|
||||
// in creating special code for this case
|
||||
needTemporary = 1;
|
||||
if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) {
|
||||
struct rc_instruction * inst_mov;
|
||||
|
||||
inst_mov = emit1(c, inst,
|
||||
OPCODE_MOV, 0, inst->I.DstReg,
|
||||
srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c)));
|
||||
|
||||
inst->I.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
|
||||
inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
}
|
||||
|
||||
if (needTemporary) {
|
||||
temp = radeonFindFreeTemporary(t);
|
||||
} else {
|
||||
temp = inst->DstReg.Index;
|
||||
}
|
||||
temp = inst->I.DstReg.Index;
|
||||
srctemp = srcreg(PROGRAM_TEMPORARY, temp);
|
||||
|
||||
// tmp.x = max(0.0, Src.x);
|
||||
// tmp.y = max(0.0, Src.y);
|
||||
// tmp.w = clamp(Src.z, -128+eps, 128-eps);
|
||||
emit2(t->Program, OPCODE_MAX, 0,
|
||||
emit2(c, inst->Prev, OPCODE_MAX, 0,
|
||||
dstregtmpmask(temp, WRITEMASK_XYW),
|
||||
inst->SrcReg[0],
|
||||
inst->I.SrcReg[0],
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constant),
|
||||
SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
|
||||
emit2(t->Program, OPCODE_MIN, 0,
|
||||
emit2(c, inst->Prev, OPCODE_MIN, 0,
|
||||
dstregtmpmask(temp, WRITEMASK_Z),
|
||||
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
|
||||
|
||||
// tmp.w = Pow(tmp.y, tmp.w)
|
||||
emit1(t->Program, OPCODE_LG2, 0,
|
||||
emit1(c, inst->Prev, OPCODE_LG2, 0,
|
||||
dstregtmpmask(temp, WRITEMASK_W),
|
||||
swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
|
||||
emit2(t->Program, OPCODE_MUL, 0,
|
||||
emit2(c, inst->Prev, OPCODE_MUL, 0,
|
||||
dstregtmpmask(temp, WRITEMASK_W),
|
||||
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
|
||||
emit1(t->Program, OPCODE_EX2, 0,
|
||||
emit1(c, inst->Prev, OPCODE_EX2, 0,
|
||||
dstregtmpmask(temp, WRITEMASK_W),
|
||||
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
|
||||
|
||||
// tmp.z = (tmp.x > 0) ? tmp.w : 0.0
|
||||
emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
|
||||
emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode,
|
||||
dstregtmpmask(temp, WRITEMASK_Z),
|
||||
negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
|
||||
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
builtin_zero);
|
||||
|
||||
// tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
|
||||
emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
|
||||
emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode,
|
||||
dstregtmpmask(temp, WRITEMASK_XYW),
|
||||
swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
|
||||
|
||||
if (needTemporary)
|
||||
emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_LRP(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_LRP(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
int tempreg = radeonFindFreeTemporary(t);
|
||||
int tempreg = rc_find_free_temporary(c);
|
||||
|
||||
emit2(t->Program, OPCODE_ADD, 0,
|
||||
emit2(c, inst->Prev, OPCODE_ADD, 0,
|
||||
dstreg(PROGRAM_TEMPORARY, tempreg),
|
||||
inst->SrcReg[1], negate(inst->SrcReg[2]));
|
||||
emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
|
||||
inst->DstReg,
|
||||
inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
|
||||
inst->I.SrcReg[1], negate(inst->I.SrcReg[2]));
|
||||
emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode,
|
||||
inst->I.DstReg,
|
||||
inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]);
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_POW(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_POW(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
int tempreg = radeonFindFreeTemporary(t);
|
||||
int tempreg = rc_find_free_temporary(c);
|
||||
struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
|
||||
struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
|
||||
tempdst.WriteMask = WRITEMASK_W;
|
||||
tempsrc.Swizzle = SWIZZLE_WWWW;
|
||||
|
||||
emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
|
||||
emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
|
||||
emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
|
||||
emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0]));
|
||||
emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1]));
|
||||
emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc);
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_RSQ(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_RSQ(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
|
||||
inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]);
|
||||
}
|
||||
|
||||
static void transform_SGE(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_SGE(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
int tempreg = radeonFindFreeTemporary(t);
|
||||
int tempreg = rc_find_free_temporary(c);
|
||||
|
||||
emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
|
||||
emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
|
||||
emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
|
||||
emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
|
||||
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_SLT(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_SLT(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
int tempreg = radeonFindFreeTemporary(t);
|
||||
int tempreg = rc_find_free_temporary(c);
|
||||
|
||||
emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
|
||||
emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
|
||||
emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
|
||||
emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
|
||||
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_SUB(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_SUB(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
|
||||
inst->I.Opcode = OPCODE_ADD;
|
||||
inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]);
|
||||
}
|
||||
|
||||
static void transform_SWZ(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_SWZ(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
|
||||
inst->I.Opcode = OPCODE_MOV;
|
||||
}
|
||||
|
||||
static void transform_XPD(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_XPD(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
int tempreg = radeonFindFreeTemporary(t);
|
||||
int tempreg = rc_find_free_temporary(c);
|
||||
|
||||
emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
|
||||
swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
|
||||
swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
|
||||
emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
|
||||
swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
|
||||
swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
|
||||
emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
|
||||
swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
|
||||
swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
|
||||
emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg,
|
||||
swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
|
||||
swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
|
||||
negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -406,63 +421,64 @@ static void transform_XPD(struct radeon_transform_context* t,
|
|||
*
|
||||
* @note should be applicable to R300 and R500 fragment programs.
|
||||
*/
|
||||
GLboolean radeonTransformALU(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst,
|
||||
GLboolean radeonTransformALU(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
{
|
||||
switch(inst->Opcode) {
|
||||
case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
|
||||
case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
|
||||
case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
|
||||
case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
|
||||
case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
|
||||
case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
|
||||
case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
|
||||
case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
|
||||
case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
|
||||
case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
|
||||
case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
|
||||
case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
|
||||
case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
|
||||
switch(inst->I.Opcode) {
|
||||
case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE;
|
||||
case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
|
||||
case OPCODE_DST: transform_DST(c, inst); return GL_TRUE;
|
||||
case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
|
||||
case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE;
|
||||
case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
|
||||
case OPCODE_POW: transform_POW(c, inst); return GL_TRUE;
|
||||
case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE;
|
||||
case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE;
|
||||
case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE;
|
||||
case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
|
||||
case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
|
||||
case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
|
||||
default:
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void transform_r300_vertex_ABS(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst)
|
||||
static void transform_r300_vertex_ABS(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
{
|
||||
/* Note: r500 can take absolute values, but r300 cannot. */
|
||||
struct prog_src_register src1 = inst->SrcReg[0];
|
||||
src1.Negate ^= NEGATE_XYZW;
|
||||
|
||||
emit2(t->Program, OPCODE_MAX, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], src1);
|
||||
inst->I.Opcode = OPCODE_MAX;
|
||||
inst->I.SrcReg[1] = inst->I.SrcReg[0];
|
||||
inst->I.SrcReg[1].Negate ^= NEGATE_XYZW;
|
||||
}
|
||||
|
||||
/**
|
||||
* For use with radeonLocalTransform, this transforms non-native ALU
|
||||
* instructions of the r300 up to r500 vertex engine.
|
||||
*/
|
||||
GLboolean r300_transform_vertex_alu(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst,
|
||||
GLboolean r300_transform_vertex_alu(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
{
|
||||
switch(inst->Opcode) {
|
||||
case OPCODE_ABS: transform_r300_vertex_ABS(t, inst); return GL_TRUE;
|
||||
case OPCODE_DP3: transform_DP3(t, inst); return GL_TRUE;
|
||||
case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
|
||||
case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
|
||||
case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
|
||||
case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
|
||||
case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
|
||||
case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
|
||||
switch(inst->I.Opcode) {
|
||||
case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE;
|
||||
case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE;
|
||||
case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
|
||||
case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
|
||||
case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
|
||||
case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
|
||||
case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
|
||||
case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
|
||||
default:
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
|
||||
static void sincos_constants(struct radeon_compiler* c, GLuint *constants)
|
||||
{
|
||||
static const GLfloat SinCosConsts[2][4] = {
|
||||
{
|
||||
|
|
@ -480,11 +496,8 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
|
|||
};
|
||||
int i;
|
||||
|
||||
for(i = 0; i < 2; ++i) {
|
||||
GLuint swz;
|
||||
constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
|
||||
ASSERT(swz == SWIZZLE_NOOP);
|
||||
}
|
||||
for(i = 0; i < 2; ++i)
|
||||
constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -495,23 +508,24 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
|
|||
* MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
|
||||
* MAD dest, tmp.y, weight, tmp.x
|
||||
*/
|
||||
static void sin_approx(struct radeon_transform_context* t,
|
||||
static void sin_approx(
|
||||
struct radeon_compiler* c, struct rc_instruction * after,
|
||||
struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
|
||||
{
|
||||
GLuint tempreg = radeonFindFreeTemporary(t);
|
||||
GLuint tempreg = rc_find_free_temporary(c);
|
||||
|
||||
emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
|
||||
emit2(c, after->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
|
||||
swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
srcreg(PROGRAM_CONSTANT, constants[0]));
|
||||
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
|
||||
emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
|
||||
absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
|
||||
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
|
||||
emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
|
||||
negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
|
||||
emit3(t->Program, OPCODE_MAD, 0, dst,
|
||||
emit3(c, after->Prev, OPCODE_MAD, 0, dst,
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
|
||||
|
|
@ -522,78 +536,80 @@ static void sin_approx(struct radeon_transform_context* t,
|
|||
* using only the basic instructions
|
||||
* MOV, ADD, MUL, MAD, FRC
|
||||
*/
|
||||
GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst,
|
||||
GLboolean radeonTransformTrigSimple(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
{
|
||||
if (inst->Opcode != OPCODE_COS &&
|
||||
inst->Opcode != OPCODE_SIN &&
|
||||
inst->Opcode != OPCODE_SCS)
|
||||
if (inst->I.Opcode != OPCODE_COS &&
|
||||
inst->I.Opcode != OPCODE_SIN &&
|
||||
inst->I.Opcode != OPCODE_SCS)
|
||||
return GL_FALSE;
|
||||
|
||||
GLuint constants[2];
|
||||
GLuint tempreg = radeonFindFreeTemporary(t);
|
||||
GLuint tempreg = rc_find_free_temporary(c);
|
||||
|
||||
sincos_constants(t, constants);
|
||||
sincos_constants(c, constants);
|
||||
|
||||
if (inst->Opcode == OPCODE_COS) {
|
||||
if (inst->I.Opcode == OPCODE_COS) {
|
||||
// MAD tmp.x, src, 1/(2*PI), 0.75
|
||||
// FRC tmp.x, tmp.x
|
||||
// MAD tmp.z, tmp.x, 2*PI, -PI
|
||||
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
|
||||
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
|
||||
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
|
||||
|
||||
sin_approx(t, inst->DstReg,
|
||||
sin_approx(c, inst->Prev, inst->I.DstReg,
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
constants);
|
||||
} else if (inst->Opcode == OPCODE_SIN) {
|
||||
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
} else if (inst->I.Opcode == OPCODE_SIN) {
|
||||
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
|
||||
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
|
||||
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
|
||||
|
||||
sin_approx(t, inst->DstReg,
|
||||
sin_approx(c, inst->Prev, inst->I.DstReg,
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
constants);
|
||||
} else {
|
||||
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
|
||||
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
|
||||
swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
|
||||
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
|
||||
emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
|
||||
srcreg(PROGRAM_TEMPORARY, tempreg));
|
||||
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
|
||||
emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
|
||||
srcreg(PROGRAM_TEMPORARY, tempreg),
|
||||
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
|
||||
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
|
||||
|
||||
struct prog_dst_register dst = inst->DstReg;
|
||||
struct prog_dst_register dst = inst->I.DstReg;
|
||||
|
||||
dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
|
||||
sin_approx(t, dst,
|
||||
dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X;
|
||||
sin_approx(c, inst->Prev, dst,
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
constants);
|
||||
|
||||
dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
|
||||
sin_approx(t, dst,
|
||||
dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y;
|
||||
sin_approx(c, inst->Prev, dst,
|
||||
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
|
||||
constants);
|
||||
}
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
|
@ -606,50 +622,52 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
|
|||
*
|
||||
* @warning This transformation implicitly changes the semantics of SIN and COS!
|
||||
*/
|
||||
GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst,
|
||||
GLboolean radeonTransformTrigScale(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
{
|
||||
if (inst->Opcode != OPCODE_COS &&
|
||||
inst->Opcode != OPCODE_SIN &&
|
||||
inst->Opcode != OPCODE_SCS)
|
||||
if (inst->I.Opcode != OPCODE_COS &&
|
||||
inst->I.Opcode != OPCODE_SIN &&
|
||||
inst->I.Opcode != OPCODE_SCS)
|
||||
return GL_FALSE;
|
||||
|
||||
static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
|
||||
static const GLfloat RCP_2PI = 0.15915494309189535;
|
||||
GLuint temp;
|
||||
GLuint constant;
|
||||
GLuint constant_swizzle;
|
||||
|
||||
temp = radeonFindFreeTemporary(t);
|
||||
constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
|
||||
temp = rc_find_free_temporary(c);
|
||||
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
|
||||
|
||||
emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
|
||||
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
|
||||
swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
|
||||
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
|
||||
emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
|
||||
srcreg(PROGRAM_TEMPORARY, temp));
|
||||
|
||||
if (inst->Opcode == OPCODE_COS) {
|
||||
emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
|
||||
if (inst->I.Opcode == OPCODE_COS) {
|
||||
emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg,
|
||||
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
|
||||
} else if (inst->Opcode == OPCODE_SIN) {
|
||||
emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
|
||||
inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
|
||||
} else if (inst->Opcode == OPCODE_SCS) {
|
||||
struct prog_dst_register moddst = inst->DstReg;
|
||||
} else if (inst->I.Opcode == OPCODE_SIN) {
|
||||
emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode,
|
||||
inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
|
||||
} else if (inst->I.Opcode == OPCODE_SCS) {
|
||||
struct prog_dst_register moddst = inst->I.DstReg;
|
||||
|
||||
if (inst->DstReg.WriteMask & WRITEMASK_X) {
|
||||
if (inst->I.DstReg.WriteMask & WRITEMASK_X) {
|
||||
moddst.WriteMask = WRITEMASK_X;
|
||||
emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
|
||||
emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst,
|
||||
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
|
||||
}
|
||||
if (inst->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
if (inst->I.DstReg.WriteMask & WRITEMASK_Y) {
|
||||
moddst.WriteMask = WRITEMASK_Y;
|
||||
emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
|
||||
emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst,
|
||||
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
|
||||
}
|
||||
}
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
|
@ -661,21 +679,15 @@ GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
|
|||
* @warning This explicitly changes the form of DDX and DDY!
|
||||
*/
|
||||
|
||||
GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
|
||||
struct prog_instruction* inst,
|
||||
GLboolean radeonTransformDeriv(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
{
|
||||
if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
|
||||
if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY)
|
||||
return GL_FALSE;
|
||||
|
||||
struct prog_src_register B = inst->SrcReg[1];
|
||||
|
||||
B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
|
||||
SWIZZLE_ONE, SWIZZLE_ONE);
|
||||
B.Negate = NEGATE_XYZW;
|
||||
|
||||
emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
|
||||
inst->SrcReg[0], B);
|
||||
inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
|
||||
inst->I.SrcReg[1].Negate = NEGATE_XYZW;
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,28 +31,28 @@
|
|||
#include "radeon_program.h"
|
||||
|
||||
GLboolean radeonTransformALU(
|
||||
struct radeon_transform_context *t,
|
||||
struct prog_instruction*,
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void*);
|
||||
|
||||
GLboolean r300_transform_vertex_alu(
|
||||
struct radeon_transform_context *t,
|
||||
struct prog_instruction*,
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void*);
|
||||
|
||||
GLboolean radeonTransformTrigSimple(
|
||||
struct radeon_transform_context *t,
|
||||
struct prog_instruction*,
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void*);
|
||||
|
||||
GLboolean radeonTransformTrigScale(
|
||||
struct radeon_transform_context *t,
|
||||
struct prog_instruction*,
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void*);
|
||||
|
||||
GLboolean radeonTransformDeriv(
|
||||
struct radeon_transform_context *t,
|
||||
struct prog_instruction*,
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void*);
|
||||
|
||||
#endif /* __RADEON_PROGRAM_ALU_H_ */
|
||||
|
|
|
|||
|
|
@ -1063,24 +1063,6 @@ r300FetchStateParameter(GLcontext * ctx,
|
|||
break;
|
||||
}
|
||||
|
||||
case STATE_R300_TEXRECT_FACTOR:{
|
||||
struct gl_texture_object *t =
|
||||
ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX];
|
||||
|
||||
if (t && t->Image[0][t->BaseLevel]) {
|
||||
struct gl_texture_image *image =
|
||||
t->Image[0][t->BaseLevel];
|
||||
value[0] = 1.0 / image->Width2;
|
||||
value[1] = 1.0 / image->Height2;
|
||||
} else {
|
||||
value[0] = 1.0;
|
||||
value[1] = 1.0;
|
||||
}
|
||||
value[2] = 1.0;
|
||||
value[3] = 1.0;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -2029,7 +2011,7 @@ void r300UpdateShaders(r300ContextPtr rmesa)
|
|||
rmesa->radeon.NewGLState = 0;
|
||||
}
|
||||
|
||||
static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index)
|
||||
static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer)
|
||||
{
|
||||
static const GLfloat dummy[4] = { 0, 0, 0, 0 };
|
||||
r300ContextPtr rmesa = R300_CONTEXT(ctx);
|
||||
|
|
@ -2041,6 +2023,47 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index)
|
|||
return fp->Base->Parameters->ParameterValues[rcc->u.External];
|
||||
case RC_CONSTANT_IMMEDIATE:
|
||||
return rcc->u.Immediate;
|
||||
case RC_CONSTANT_STATE:
|
||||
switch(rcc->u.State[0]) {
|
||||
case RC_STATE_SHADOW_AMBIENT: {
|
||||
const int unit = (int) rcc->u.State[1];
|
||||
const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
|
||||
if (texObj) {
|
||||
buffer[0] =
|
||||
buffer[1] =
|
||||
buffer[2] =
|
||||
buffer[3] = texObj->CompareFailValue;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
case RC_STATE_R300_WINDOW_DIMENSION: {
|
||||
__DRIdrawablePrivate * drawable = radeon_get_drawable(&rmesa->radeon);
|
||||
buffer[0] = drawable->w * 0.5f; /* width*0.5 */
|
||||
buffer[1] = drawable->h * 0.5f; /* height*0.5 */
|
||||
buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */
|
||||
buffer[3] = 1.0F; /* not used */
|
||||
return buffer;
|
||||
}
|
||||
|
||||
case RC_STATE_R300_TEXRECT_FACTOR: {
|
||||
struct gl_texture_object *t =
|
||||
ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX];
|
||||
|
||||
if (t && t->Image[0][t->BaseLevel]) {
|
||||
struct gl_texture_image *image =
|
||||
t->Image[0][t->BaseLevel];
|
||||
buffer[0] = 1.0 / image->Width2;
|
||||
buffer[1] = 1.0 / image->Height2;
|
||||
} else {
|
||||
buffer[0] = 1.0;
|
||||
buffer[1] = 1.0;
|
||||
}
|
||||
buffer[2] = 1.0;
|
||||
buffer[3] = 1.0;
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dummy;
|
||||
|
|
@ -2096,7 +2119,8 @@ static void r300SetupPixelShader(GLcontext *ctx)
|
|||
R300_STATECHANGE(rmesa, fpp);
|
||||
rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4);
|
||||
for (i = 0; i < fp->code.constants.Count; i++) {
|
||||
const GLfloat *constant = get_fragmentprogram_constant(ctx, i);
|
||||
GLfloat buffer[4];
|
||||
const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
|
||||
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
|
||||
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
|
||||
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
|
||||
|
|
@ -2157,7 +2181,8 @@ static void r500SetupPixelShader(GLcontext *ctx)
|
|||
|
||||
R300_STATECHANGE(rmesa, r500fp_const);
|
||||
for (i = 0; i < fp->code.constants.Count; i++) {
|
||||
const GLfloat *constant = get_fragmentprogram_constant(ctx, i);
|
||||
GLfloat buffer[4];
|
||||
const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
|
||||
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
|
||||
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
|
||||
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue