mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 15:20:17 +01:00
r600: fix sin,cos functions on r600
r600 doesnt need the same normalization as r700 - instead it requires range to be truncated to -pi..pi I left the range trunc also effective on r700 althouch according the docs it has sufficent range (-512*PI, +512*PI). The instructions seem to be used not too often to cause perf loss because of this Based on patches and testing by Conn Clark and Alain Perrot
This commit is contained in:
parent
c1f33097f4
commit
d6a5f94ea4
1 changed files with 133 additions and 9 deletions
|
|
@ -2872,25 +2872,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
|
|||
|
||||
GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
|
||||
{
|
||||
/*
|
||||
* r600 - trunc to -PI..PI range
|
||||
* r700 - normalize by dividing by 2PI
|
||||
* see fdo bug 27901
|
||||
*/
|
||||
|
||||
int tmp;
|
||||
checkop1(pAsm);
|
||||
|
||||
tmp = gethelpr(pAsm);
|
||||
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
|
||||
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
|
||||
pAsm->D.dst.op3 = 1;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
pAsm->D.dst.writex = 1;
|
||||
|
||||
assemble_src(pAsm, 0, -1);
|
||||
|
||||
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[2].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
|
||||
|
||||
pAsm->D2.dst2.literal_slots = 1;
|
||||
pAsm->C[0].f = 1/(3.1415926535 * 2);
|
||||
pAsm->C[1].f = 0.0F;
|
||||
next_ins(pAsm);
|
||||
pAsm->C[1].f = 0.5f;
|
||||
|
||||
if ( GL_FALSE == next_ins(pAsm) )
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
pAsm->D.dst.writex = 1;
|
||||
|
||||
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
|
||||
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
|
||||
pAsm->S[0].src.reg = tmp;
|
||||
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
|
||||
|
||||
if(( GL_FALSE == next_ins(pAsm) ))
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
|
||||
pAsm->D.dst.op3 = 1;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
|
||||
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
|
||||
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
|
||||
pAsm->S[0].src.reg = tmp;
|
||||
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[2].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
|
||||
|
||||
pAsm->D2.dst2.literal_slots = 1;
|
||||
|
||||
if (pAsm->bR6xx)
|
||||
{
|
||||
pAsm->C[0].f = 3.1415926535897f * 2.0f;
|
||||
pAsm->C[1].f = -3.1415926535897f;
|
||||
}
|
||||
else
|
||||
{
|
||||
pAsm->C[0].f = 1.0f;
|
||||
pAsm->C[1].f = -0.5f;
|
||||
}
|
||||
|
||||
if(( GL_FALSE == next_ins(pAsm) ))
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
pAsm->D.dst.opcode = opcode;
|
||||
pAsm->D.dst.math = 1;
|
||||
|
|
@ -4030,22 +4097,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
|
|||
checkop1(pAsm);
|
||||
|
||||
tmp = gethelpr(pAsm);
|
||||
/* tmp.x = src /2*PI */
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
|
||||
|
||||
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
|
||||
pAsm->D.dst.op3 = 1;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
pAsm->D.dst.writex = 1;
|
||||
|
||||
assemble_src(pAsm, 0, -1);
|
||||
|
||||
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[2].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
|
||||
|
||||
pAsm->D2.dst2.literal_slots = 1;
|
||||
pAsm->C[0].f = 1/(3.1415926535 * 2);
|
||||
pAsm->C[1].f = 0.0F;
|
||||
pAsm->C[1].f = 0.5F;
|
||||
|
||||
next_ins(pAsm);
|
||||
if ( GL_FALSE == next_ins(pAsm) )
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
pAsm->D.dst.writex = 1;
|
||||
|
||||
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
|
||||
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
|
||||
pAsm->S[0].src.reg = tmp;
|
||||
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
|
||||
|
||||
if(( GL_FALSE == next_ins(pAsm) ))
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
|
||||
pAsm->D.dst.op3 = 1;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
|
||||
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
|
||||
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
|
||||
pAsm->S[0].src.reg = tmp;
|
||||
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[2].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
|
||||
|
||||
pAsm->D2.dst2.literal_slots = 1;
|
||||
|
||||
if(pAsm->bR6xx) {
|
||||
pAsm->C[0].f = 3.1415926535897f * 2.0f;
|
||||
pAsm->C[1].f = -3.1415926535897f;
|
||||
} else {
|
||||
pAsm->C[0].f = 1.0f;
|
||||
pAsm->C[1].f = -0.5f;
|
||||
}
|
||||
|
||||
if(( GL_FALSE == next_ins(pAsm) ))
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
// COS dst.x, a.x
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_COS;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue