r300g: implement TRUNC correctly

This fixes some integer division tests.
This commit is contained in:
Marek Olšák 2012-08-27 07:38:15 +02:00
parent f402acdbe2
commit 7f0fcf17c3
4 changed files with 42 additions and 1 deletions

View file

@ -342,6 +342,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_TRUNC,
.Name = "TRUNC",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_XPD,
.Name = "XPD",

View file

@ -175,6 +175,9 @@ typedef enum {
/** vec4 instruction: dst.c = src0.c */
RC_OPCODE_SWZ,
/** vec4 instruction: dst.c = (abs(src0.c) - fract(abs(src0.c))) * sgn(src0.c) */
RC_OPCODE_TRUNC,
/** special instruction, see ARB_fragment_program */
RC_OPCODE_XPD,

View file

@ -321,6 +321,24 @@ static void transform_FLR(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
static void transform_TRUNC(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* Definition of trunc:
* trunc(x) = (abs(x) - fract(abs(x))) * sgn(x)
*
* The multiplication by sgn(x) can be simplified using CMP:
* y * sgn(x) = (x < 0 ? -y : y)
*/
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, absolute(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, absolute(inst->U.I.SrcReg[0]),
negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0],
negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index));
rc_remove_instruction(inst);
}
/**
* Definition of LIT (from ARB_fragment_program):
*
@ -666,6 +684,7 @@ int radeonTransformALU(
case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;
@ -866,6 +885,17 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
static void transform_vertex_TRUNC(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_instruction *next = inst->Next;
/* next->Prev is removed after each transformation and replaced
* by a new instruction. */
transform_TRUNC(c, next->Prev);
transform_r300_vertex_CMP(c, next->Prev);
}
/**
* For use with rc_local_transform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
@ -904,6 +934,7 @@ int r300_transform_vertex_alu(
case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;

View file

@ -114,7 +114,7 @@ static unsigned translate_opcode(unsigned opcode)
case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL;
/* case TGSI_OPCODE_I2F: return RC_OPCODE_I2F; */
/* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */
case TGSI_OPCODE_TRUNC: return RC_OPCODE_FLR;
case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC;
/* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */
/* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */
/* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */