mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
r600: fix sin,cos functions on r600
r600 doesnt need the same normalization as r700 - instead it requires
range to be truncated to -pi..pi
I left the range trunc also effective on r700 althouch according the docs
it has sufficent range (-512*PI, +512*PI). The instructions seem
to be used not too often to cause perf loss because of this
Based on patches and testing by Conn Clark and Alain Perrot
(cherry picked from commit d6a5f94ea4)
This commit is contained in:
parent
aac05a8580
commit
886019125e
1 changed files with 133 additions and 9 deletions
|
|
@ -2872,25 +2872,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
|
|||
|
||||
GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
|
||||
{
|
||||
/*
|
||||
* r600 - trunc to -PI..PI range
|
||||
* r700 - normalize by dividing by 2PI
|
||||
* see fdo bug 27901
|
||||
*/
|
||||
|
||||
int tmp;
|
||||
checkop1(pAsm);
|
||||
|
||||
tmp = gethelpr(pAsm);
|
||||
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
|
||||
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
|
||||
pAsm->D.dst.op3 = 1;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
pAsm->D.dst.writex = 1;
|
||||
|
||||
assemble_src(pAsm, 0, -1);
|
||||
|
||||
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[2].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
|
||||
|
||||
pAsm->D2.dst2.literal_slots = 1;
|
||||
pAsm->C[0].f = 1/(3.1415926535 * 2);
|
||||
pAsm->C[1].f = 0.0F;
|
||||
next_ins(pAsm);
|
||||
pAsm->C[1].f = 0.5f;
|
||||
|
||||
if ( GL_FALSE == next_ins(pAsm) )
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
pAsm->D.dst.writex = 1;
|
||||
|
||||
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
|
||||
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
|
||||
pAsm->S[0].src.reg = tmp;
|
||||
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
|
||||
|
||||
if(( GL_FALSE == next_ins(pAsm) ))
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
|
||||
pAsm->D.dst.op3 = 1;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
|
||||
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
|
||||
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
|
||||
pAsm->S[0].src.reg = tmp;
|
||||
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[2].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
|
||||
|
||||
pAsm->D2.dst2.literal_slots = 1;
|
||||
|
||||
if (pAsm->bR6xx)
|
||||
{
|
||||
pAsm->C[0].f = 3.1415926535897f * 2.0f;
|
||||
pAsm->C[1].f = -3.1415926535897f;
|
||||
}
|
||||
else
|
||||
{
|
||||
pAsm->C[0].f = 1.0f;
|
||||
pAsm->C[1].f = -0.5f;
|
||||
}
|
||||
|
||||
if(( GL_FALSE == next_ins(pAsm) ))
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
pAsm->D.dst.opcode = opcode;
|
||||
pAsm->D.dst.math = 1;
|
||||
|
|
@ -4030,22 +4097,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
|
|||
checkop1(pAsm);
|
||||
|
||||
tmp = gethelpr(pAsm);
|
||||
/* tmp.x = src /2*PI */
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
|
||||
|
||||
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
|
||||
pAsm->D.dst.op3 = 1;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
pAsm->D.dst.writex = 1;
|
||||
|
||||
assemble_src(pAsm, 0, -1);
|
||||
|
||||
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[2].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
|
||||
|
||||
pAsm->D2.dst2.literal_slots = 1;
|
||||
pAsm->C[0].f = 1/(3.1415926535 * 2);
|
||||
pAsm->C[1].f = 0.0F;
|
||||
pAsm->C[1].f = 0.5F;
|
||||
|
||||
next_ins(pAsm);
|
||||
if ( GL_FALSE == next_ins(pAsm) )
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
pAsm->D.dst.writex = 1;
|
||||
|
||||
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
|
||||
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
|
||||
pAsm->S[0].src.reg = tmp;
|
||||
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
|
||||
|
||||
if(( GL_FALSE == next_ins(pAsm) ))
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
|
||||
pAsm->D.dst.op3 = 1;
|
||||
|
||||
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
|
||||
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
|
||||
pAsm->D.dst.reg = tmp;
|
||||
|
||||
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
|
||||
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
|
||||
pAsm->S[0].src.reg = tmp;
|
||||
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
|
||||
|
||||
pAsm->S[2].src.rtype = SRC_REC_LITERAL;
|
||||
setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
|
||||
|
||||
pAsm->D2.dst2.literal_slots = 1;
|
||||
|
||||
if(pAsm->bR6xx) {
|
||||
pAsm->C[0].f = 3.1415926535897f * 2.0f;
|
||||
pAsm->C[1].f = -3.1415926535897f;
|
||||
} else {
|
||||
pAsm->C[0].f = 1.0f;
|
||||
pAsm->C[1].f = -0.5f;
|
||||
}
|
||||
|
||||
if(( GL_FALSE == next_ins(pAsm) ))
|
||||
{
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
// COS dst.x, a.x
|
||||
pAsm->D.dst.opcode = SQ_OP2_INST_COS;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue