r500: Handle non-native swizzles in texture instructions

This fixes piglit's fp-kil and fp-generic/kil-swizzle tests.
This commit is contained in:
Nicolai Haehnle 2008-07-27 16:36:05 +02:00
parent 1bdf5e09a0
commit 0973d348d7
2 changed files with 79 additions and 26 deletions

View file

@ -269,44 +269,87 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
GLuint relevant;
int i;
if (reg.Abs)
if (opcode == OPCODE_TEX ||
opcode == OPCODE_TXB ||
opcode == OPCODE_TXP ||
opcode == OPCODE_KIL) {
if (reg.Abs)
return GL_FALSE;
if (reg.NegateAbs)
reg.NegateBase ^= 15;
if (opcode == OPCODE_KIL) {
if (reg.Swizzle != SWIZZLE_NOOP)
return GL_FALSE;
} else {
for(i = 0; i < 4; ++i) {
GLuint swz = GET_SWZ(reg.Swizzle, i);
if (swz == SWIZZLE_NIL) {
reg.NegateBase &= ~(1 << i);
continue;
}
if (swz >= 4)
return GL_FALSE;
}
}
if (reg.NegateBase)
return GL_FALSE;
return GL_TRUE;
} else {
/* ALU instructions support almost everything */
if (reg.Abs)
return GL_TRUE;
relevant = 0;
for(i = 0; i < 3; ++i) {
GLuint swz = GET_SWZ(reg.Swizzle, i);
if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
relevant |= 1 << i;
relevant = 0;
for(i = 0; i < 3; ++i) {
GLuint swz = GET_SWZ(reg.Swizzle, i);
if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
relevant |= 1 << i;
}
if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
return GL_FALSE;
return GL_TRUE;
}
if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
return GL_FALSE;
return GL_TRUE;
}
/**
* Implement a non-native swizzle. This function assumes that
* is_native_swizzle returned true.
* Implement a MOV with a potentially non-native swizzle.
*
* The only thing we *cannot* do in an ALU instruction is per-component
* negation. Therefore, we split the MOV into two instructions when necessary.
*/
static void nqssadce_build_swizzle(struct nqssadce_state *s,
struct prog_dst_register dst, struct prog_src_register src)
{
struct prog_instruction *inst;
GLuint negatebase[2] = { 0, 0 };
int i;
_mesa_insert_instructions(s->Program, s->IP, 2);
for(i = 0; i < 4; ++i) {
GLuint swz = GET_SWZ(src.Swizzle, i);
if (swz == SWIZZLE_NIL)
continue;
negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i;
}
_mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
inst = s->Program->Instructions + s->IP;
inst[0].Opcode = OPCODE_MOV;
inst[0].DstReg = dst;
inst[0].DstReg.WriteMask &= src.NegateBase;
inst[0].SrcReg[0] = src;
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
inst[1].Opcode = OPCODE_MOV;
inst[1].DstReg = dst;
inst[1].DstReg.WriteMask &= ~src.NegateBase;
inst[1].SrcReg[0] = src;
s->IP += 2;
inst->Opcode = OPCODE_MOV;
inst->DstReg = dst;
inst->DstReg.WriteMask = negatebase[i];
inst->SrcReg[0] = src;
inst++;
s->IP++;
}
}
static GLuint build_dtm(GLuint depthmode)

View file

@ -265,11 +265,21 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
inst->SrcReg[0] = tmp;
break;
case OPCODE_MOV:
inst->SrcReg[1] = inst->SrcReg[0];
/* AMD say we should use CMP.
* However, when we transform
* KIL -r0;
* into
* CMP tmp, -r0, -r0, 0;
* KIL tmp;
* we get incorrect behaviour on R500 when r0 == 0.0.
* It appears that the R500 KIL hardware treats -0.0 as less
* than zero.
*/
inst->SrcReg[1].File = PROGRAM_BUILTIN;
inst->SrcReg[1].Swizzle = SWIZZLE_1111;
inst->SrcReg[2].File = PROGRAM_BUILTIN;
inst->SrcReg[2].Swizzle = SWIZZLE_0000;
inst->Opcode = OPCODE_CMP;
// TODO: disable output modifiers on R500
inst->Opcode = OPCODE_MAD;
break;
case OPCODE_MUL:
inst->SrcReg[2].File = PROGRAM_BUILTIN;