r500: Handle non-native swizzles in texture instructions

This fixes piglit's fp-kil and fp-generic/kil-swizzle tests.
This commit is contained in:
Nicolai Haehnle 2008-07-27 16:36:05 +02:00
parent 1bdf5e09a0
commit 0973d348d7
2 changed files with 79 additions and 26 deletions

View file

@ -269,44 +269,87 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
GLuint relevant; GLuint relevant;
int i; int i;
if (reg.Abs) if (opcode == OPCODE_TEX ||
opcode == OPCODE_TXB ||
opcode == OPCODE_TXP ||
opcode == OPCODE_KIL) {
if (reg.Abs)
return GL_FALSE;
if (reg.NegateAbs)
reg.NegateBase ^= 15;
if (opcode == OPCODE_KIL) {
if (reg.Swizzle != SWIZZLE_NOOP)
return GL_FALSE;
} else {
for(i = 0; i < 4; ++i) {
GLuint swz = GET_SWZ(reg.Swizzle, i);
if (swz == SWIZZLE_NIL) {
reg.NegateBase &= ~(1 << i);
continue;
}
if (swz >= 4)
return GL_FALSE;
}
}
if (reg.NegateBase)
return GL_FALSE;
return GL_TRUE; return GL_TRUE;
} else {
/* ALU instructions support almost everything */
if (reg.Abs)
return GL_TRUE;
relevant = 0; relevant = 0;
for(i = 0; i < 3; ++i) { for(i = 0; i < 3; ++i) {
GLuint swz = GET_SWZ(reg.Swizzle, i); GLuint swz = GET_SWZ(reg.Swizzle, i);
if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
relevant |= 1 << i; relevant |= 1 << i;
}
if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
return GL_FALSE;
return GL_TRUE;
} }
if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
return GL_FALSE;
return GL_TRUE;
} }
/** /**
* Implement a non-native swizzle. This function assumes that * Implement a MOV with a potentially non-native swizzle.
* is_native_swizzle returned true. *
* The only thing we *cannot* do in an ALU instruction is per-component
* negation. Therefore, we split the MOV into two instructions when necessary.
*/ */
static void nqssadce_build_swizzle(struct nqssadce_state *s, static void nqssadce_build_swizzle(struct nqssadce_state *s,
struct prog_dst_register dst, struct prog_src_register src) struct prog_dst_register dst, struct prog_src_register src)
{ {
struct prog_instruction *inst; struct prog_instruction *inst;
GLuint negatebase[2] = { 0, 0 };
int i;
_mesa_insert_instructions(s->Program, s->IP, 2); for(i = 0; i < 4; ++i) {
GLuint swz = GET_SWZ(src.Swizzle, i);
if (swz == SWIZZLE_NIL)
continue;
negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i;
}
_mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
inst = s->Program->Instructions + s->IP; inst = s->Program->Instructions + s->IP;
inst[0].Opcode = OPCODE_MOV; for(i = 0; i <= 1; ++i) {
inst[0].DstReg = dst; if (!negatebase[i])
inst[0].DstReg.WriteMask &= src.NegateBase; continue;
inst[0].SrcReg[0] = src;
inst[1].Opcode = OPCODE_MOV; inst->Opcode = OPCODE_MOV;
inst[1].DstReg = dst; inst->DstReg = dst;
inst[1].DstReg.WriteMask &= ~src.NegateBase; inst->DstReg.WriteMask = negatebase[i];
inst[1].SrcReg[0] = src; inst->SrcReg[0] = src;
inst++;
s->IP += 2; s->IP++;
}
} }
static GLuint build_dtm(GLuint depthmode) static GLuint build_dtm(GLuint depthmode)

View file

@ -265,11 +265,21 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
inst->SrcReg[0] = tmp; inst->SrcReg[0] = tmp;
break; break;
case OPCODE_MOV: case OPCODE_MOV:
inst->SrcReg[1] = inst->SrcReg[0]; /* AMD say we should use CMP.
* However, when we transform
* KIL -r0;
* into
* CMP tmp, -r0, -r0, 0;
* KIL tmp;
* we get incorrect behaviour on R500 when r0 == 0.0.
* It appears that the R500 KIL hardware treats -0.0 as less
* than zero.
*/
inst->SrcReg[1].File = PROGRAM_BUILTIN;
inst->SrcReg[1].Swizzle = SWIZZLE_1111;
inst->SrcReg[2].File = PROGRAM_BUILTIN; inst->SrcReg[2].File = PROGRAM_BUILTIN;
inst->SrcReg[2].Swizzle = SWIZZLE_0000; inst->SrcReg[2].Swizzle = SWIZZLE_0000;
inst->Opcode = OPCODE_CMP; inst->Opcode = OPCODE_MAD;
// TODO: disable output modifiers on R500
break; break;
case OPCODE_MUL: case OPCODE_MUL:
inst->SrcReg[2].File = PROGRAM_BUILTIN; inst->SrcReg[2].File = PROGRAM_BUILTIN;