mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-15 00:10:25 +01:00
draw: clean up some of the xmm register manipulation function names
This commit is contained in:
parent
dc52622fcf
commit
caadc8d944
1 changed files with 78 additions and 63 deletions
|
|
@ -190,17 +190,26 @@ static void spill( struct aos_compilation *cp, unsigned idx )
|
|||
}
|
||||
}
|
||||
|
||||
static boolean is_xmm_tmp( struct aos_compilation *cp,
|
||||
struct x86_reg reg )
|
||||
|
||||
static struct x86_reg get_xmm_writable( struct aos_compilation *cp,
|
||||
struct x86_reg reg )
|
||||
{
|
||||
return (reg.file == file_XMM &&
|
||||
cp->xmm[reg.idx].file == TGSI_FILE_NULL);
|
||||
if (reg.file != file_XMM ||
|
||||
cp->xmm[reg.idx].file != TGSI_FILE_NULL)
|
||||
{
|
||||
struct x86_reg tmp = aos_get_xmm_reg(cp);
|
||||
sse_movaps(cp->func, tmp, reg);
|
||||
reg = tmp;
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
static struct x86_reg get_xmm_clone( struct aos_compilation *cp,
|
||||
struct x86_reg reg )
|
||||
static struct x86_reg get_xmm( struct aos_compilation *cp,
|
||||
struct x86_reg reg )
|
||||
{
|
||||
if (!is_xmm_tmp(cp, reg)) {
|
||||
if (reg.file != file_XMM)
|
||||
{
|
||||
struct x86_reg tmp = aos_get_xmm_reg(cp);
|
||||
sse_movaps(cp->func, tmp, reg);
|
||||
reg = tmp;
|
||||
|
|
@ -210,6 +219,9 @@ static struct x86_reg get_xmm_clone( struct aos_compilation *cp,
|
|||
}
|
||||
|
||||
|
||||
/* Allocate an empty xmm register, either as a temporary or later to
|
||||
* "adopt" as a shader reg.
|
||||
*/
|
||||
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp )
|
||||
{
|
||||
unsigned i;
|
||||
|
|
@ -251,32 +263,11 @@ void aos_release_xmm_reg( struct aos_compilation *cp,
|
|||
cp->xmm[idx].last_used = 0;
|
||||
}
|
||||
|
||||
static void invalidate_xmm( struct aos_compilation *cp,
|
||||
unsigned file, unsigned idx )
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* Invalidate any old copy of this register in XMM0-7.
|
||||
*/
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) {
|
||||
|
||||
if (cp->xmm[i].dirty)
|
||||
spill(cp, i);
|
||||
|
||||
aos_release_xmm_reg(cp, i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < 8; i++) {
|
||||
if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Mark an xmm reg as holding the current copy of a shader reg.
|
||||
*/
|
||||
void aos_adopt_xmm_reg( struct aos_compilation *cp,
|
||||
struct x86_reg reg,
|
||||
unsigned file,
|
||||
|
|
@ -290,6 +281,9 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp,
|
|||
return;
|
||||
}
|
||||
|
||||
/* If any xmm reg thinks it holds this shader reg, break the
|
||||
* illusion.
|
||||
*/
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (cp->xmm[i].file == file &&
|
||||
cp->xmm[i].idx == idx) {
|
||||
|
|
@ -304,12 +298,24 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp,
|
|||
}
|
||||
|
||||
|
||||
|
||||
/* Return a pointer to the in-memory copy of the reg, making sure it is uptodate.
|
||||
*/
|
||||
static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp,
|
||||
unsigned file,
|
||||
unsigned idx )
|
||||
{
|
||||
invalidate_xmm( cp, file, idx );
|
||||
unsigned i;
|
||||
|
||||
/* Ensure the in-memory copy of this reg is up-to-date
|
||||
*/
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (cp->xmm[i].file == file &&
|
||||
cp->xmm[i].idx == idx &&
|
||||
cp->xmm[i].dirty) {
|
||||
spill(cp, i);
|
||||
}
|
||||
}
|
||||
|
||||
return get_reg_ptr( cp, file, idx );
|
||||
}
|
||||
|
||||
|
|
@ -320,7 +326,26 @@ static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp,
|
|||
static struct x86_reg get_dst_ptr( struct aos_compilation *cp,
|
||||
const struct tgsi_full_dst_register *dst )
|
||||
{
|
||||
return aos_get_shader_reg_ptr( cp, dst->DstRegister.File, dst->DstRegister.Index );
|
||||
unsigned file = dst->DstRegister.File;
|
||||
unsigned idx = dst->DstRegister.Index;
|
||||
unsigned i;
|
||||
|
||||
|
||||
/* Ensure in-memory copy of this reg is up-to-date and invalidate
|
||||
* any xmm copies.
|
||||
*/
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (cp->xmm[i].file == file &&
|
||||
cp->xmm[i].idx == idx)
|
||||
{
|
||||
if (cp->xmm[i].dirty)
|
||||
spill(cp, i);
|
||||
|
||||
aos_release_xmm_reg(cp, i);
|
||||
}
|
||||
}
|
||||
|
||||
return get_reg_ptr( cp, file, idx );
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -358,15 +383,7 @@ static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp,
|
|||
unsigned idx )
|
||||
{
|
||||
struct x86_reg reg = aos_get_shader_reg( cp, file, idx );
|
||||
|
||||
if (reg.file != file_XMM) {
|
||||
struct x86_reg tmp = aos_get_xmm_reg(cp);
|
||||
sse_movaps(cp->func, tmp, reg);
|
||||
aos_adopt_xmm_reg( cp, tmp, file, idx, FALSE );
|
||||
reg = tmp;
|
||||
}
|
||||
|
||||
return reg;
|
||||
return get_xmm( cp, reg );
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -634,7 +651,7 @@ static void store_dest( struct aos_compilation *cp,
|
|||
|
||||
case TGSI_WRITEMASK_XYZW:
|
||||
aos_adopt_xmm_reg(cp,
|
||||
get_xmm_clone(cp, result),
|
||||
get_xmm_writable(cp, result),
|
||||
reg->DstRegister.File,
|
||||
reg->DstRegister.Index,
|
||||
TRUE);
|
||||
|
|
@ -649,15 +666,15 @@ static void store_dest( struct aos_compilation *cp,
|
|||
|
||||
switch (reg->DstRegister.WriteMask) {
|
||||
case TGSI_WRITEMASK_X:
|
||||
sse_movss(cp->func, dst, get_xmm_clone(cp, result));
|
||||
sse_movss(cp->func, dst, get_xmm(cp, result));
|
||||
break;
|
||||
|
||||
case TGSI_WRITEMASK_XY:
|
||||
sse_shufps(cp->func, dst, get_xmm_clone(cp, result), SHUF(X, Y, Z, W));
|
||||
sse_shufps(cp->func, dst, get_xmm(cp, result), SHUF(X, Y, Z, W));
|
||||
break;
|
||||
|
||||
case TGSI_WRITEMASK_ZW:
|
||||
result = get_xmm_clone(cp, result);
|
||||
result = get_xmm_writable(cp, result);
|
||||
sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W));
|
||||
dst = result;
|
||||
break;
|
||||
|
|
@ -845,7 +862,7 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
{
|
||||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_mulps(cp->func, dst, neg);
|
||||
sse_maxps(cp->func, dst, arg0);
|
||||
|
|
@ -858,7 +875,7 @@ static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
{
|
||||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_addps(cp->func, dst, arg1);
|
||||
|
||||
|
|
@ -882,10 +899,9 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg tmp = aos_get_xmm_reg(cp);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_mulps(cp->func, dst, arg1);
|
||||
|
||||
/* Now the hard bit: sum the first 3 values:
|
||||
*/
|
||||
sse_movhlps(cp->func, tmp, dst);
|
||||
|
|
@ -908,7 +924,7 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg tmp = aos_get_xmm_reg(cp);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_mulps(cp->func, dst, arg1);
|
||||
|
||||
|
|
@ -932,7 +948,7 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg tmp = aos_get_xmm_reg(cp);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_mulps(cp->func, dst, arg1);
|
||||
|
||||
|
|
@ -1366,7 +1382,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
if (writemask != TGSI_WRITEMASK_XYZW) {
|
||||
store_dest( cp,
|
||||
&op->FullDstRegisters[0],
|
||||
get_xmm_clone( cp, result ) );
|
||||
get_xmm_writable( cp, result ) );
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
|
|
@ -1440,7 +1456,7 @@ static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
{
|
||||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_maxps(cp->func, dst, arg1);
|
||||
|
||||
|
|
@ -1453,7 +1469,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
{
|
||||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_minps(cp->func, dst, arg1);
|
||||
|
||||
|
|
@ -1464,7 +1480,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
|
||||
{
|
||||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
/* potentially nothing to do */
|
||||
|
||||
|
|
@ -1476,7 +1492,7 @@ static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
{
|
||||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_mulps(cp->func, dst, arg1);
|
||||
|
||||
|
|
@ -1494,7 +1510,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
/* If we can't clobber old contents of arg0, get a temporary & copy
|
||||
* it there, then clobber it...
|
||||
*/
|
||||
arg0 = get_xmm_clone(cp, arg0);
|
||||
arg0 = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_mulps(cp->func, arg0, arg1);
|
||||
sse_addps(cp->func, arg0, arg2);
|
||||
|
|
@ -1562,7 +1578,7 @@ static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_cmpps(cp->func, dst, arg1, cc_NotLessThan);
|
||||
sse_andps(cp->func, dst, ones);
|
||||
|
|
@ -1586,7 +1602,7 @@ static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_cmpps(cp->func, dst, arg1, cc_LessThan);
|
||||
sse_andps(cp->func, dst, ones);
|
||||
|
|
@ -1599,7 +1615,7 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst
|
|||
{
|
||||
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
|
||||
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
|
||||
struct x86_reg dst = get_xmm_clone(cp, arg0);
|
||||
struct x86_reg dst = get_xmm_writable(cp, arg0);
|
||||
|
||||
sse_subps(cp->func, dst, arg1);
|
||||
|
||||
|
|
@ -1989,7 +2005,6 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
|
|||
/* decr count, loop if not zero
|
||||
*/
|
||||
x86_dec(cp.func, cp.count_ESI);
|
||||
/* x86_test(cp.func, cp.count_ESI, cp.count_ESI); */
|
||||
x86_jcc(cp.func, cc_NZ, label);
|
||||
|
||||
restore_fpu_state(&cp);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue