r600: implement LOG op in compiler

This commit is contained in:
Pierre Ossman 2009-11-01 22:28:02 +01:00 committed by Alex Deucher
parent d00cbba403
commit 213ec8251c
2 changed files with 214 additions and 3 deletions

View file

@ -3037,6 +3037,217 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
{
BITS tmp1, tmp2, tmp3;
checkop1(pAsm);
tmp1 = gethelpr(pAsm);
tmp2 = gethelpr(pAsm);
tmp3 = gethelpr(pAsm);
// FIXME: The hardware can do fabs() directly on input
// elements, but the compiler doesn't have the
// capability to use that.
// MAX tmp1.x, a.x, -a.x (fabs(a.x))
pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp1;
pAsm->D.dst.writex = 1;
if( GL_FALSE == assemble_src(pAsm, 0, -1) )
{
return GL_FALSE;
}
pAsm->S[1].bits = pAsm->S[0].bits;
flipneg_PVSSRC(&(pAsm->S[1].src));
if ( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
// Entire algo:
//
// LG2 tmp2.x, tmp1.x
// FLOOR tmp3.x, tmp2.x
// MOV dst.x, tmp3.x
// ADD tmp3.x, tmp2.x, -tmp3.x
// EX2 dst.y, tmp3.x
// MOV dst.z, tmp2.x
// MOV dst.w, 1.0
// LG2 tmp2.x, tmp1.x
// FLOOR tmp3.x, tmp2.x
pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
pAsm->D.dst.math = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp2;
pAsm->D.dst.writex = 1;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp1;
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
noneg_PVSSRC(&(pAsm->S[0].src));
if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp3;
pAsm->D.dst.writex = 1;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp2;
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
noneg_PVSSRC(&(pAsm->S[0].src));
if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
// MOV dst.x, tmp3.x
pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
if( GL_FALSE == assemble_dst(pAsm) )
{
return GL_FALSE;
}
pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp3;
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
noneg_PVSSRC(&(pAsm->S[0].src));
if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
// ADD tmp3.x, tmp2.x, -tmp3.x
// EX2 dst.y, tmp3.x
pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp3;
pAsm->D.dst.writex = 1;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp2;
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
noneg_PVSSRC(&(pAsm->S[0].src));
setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
pAsm->S[1].src.reg = tmp3;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
neg_PVSSRC(&(pAsm->S[1].src));
if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
pAsm->D.dst.math = 1;
if( GL_FALSE == assemble_dst(pAsm) )
{
return GL_FALSE;
}
pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp3;
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
noneg_PVSSRC(&(pAsm->S[0].src));
if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
// MOV dst.z, tmp2.x
pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
if( GL_FALSE == assemble_dst(pAsm) )
{
return GL_FALSE;
}
pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp2;
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
noneg_PVSSRC(&(pAsm->S[0].src));
if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
// MOV dst.w 1.0
pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
if( GL_FALSE == assemble_dst(pAsm) )
{
return GL_FALSE;
}
pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp1;
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
noneg_PVSSRC(&(pAsm->S[0].src));
if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
return GL_TRUE;
}
GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
{
int tmp, ii;
@ -4166,10 +4377,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_LOG:
radeon_error("Not yet implemented instruction OPCODE_LOG \n");
//if ( GL_FALSE == assemble_BAD("LOG") )
if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
return GL_FALSE;
break; // approx of LG2
break;
case OPCODE_MAD:
if ( GL_FALSE == assemble_MAD(pR700AsmCode) )

View file

@ -484,6 +484,7 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
GLboolean assemble_MAX(r700_AssemblerBase *pAsm);