r300: don't reuse destination registers when lowering

Regalloc can sort it out later. No significant change is shader-db, the
one instruction reduction is likely because some optimize pass can
actually work better when we are closer to ssa-like form.

RV530:
total instructions in shared programs: 133718 -> 133717 (<.01%)
instructions in affected programs: 47 -> 46 (-2.13%)
helped: 1
HURT: 0

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip@gawin.net>
Tested-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19853>
This commit is contained in:
Pavel Ondračka 2022-11-16 22:09:50 +01:00 committed by Marge Bot
parent b33845cf24
commit 90df5c3b5f

View file

@ -195,35 +195,10 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
return swizzle_smear(reg, RC_SWIZZLE_W);
}
static int is_dst_safe_to_reuse(struct rc_instruction *inst)
{
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
assert(info->HasDstReg);
if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
return 0;
for (i = 0; i < info->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index)
return 0;
}
return 1;
}
static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
static struct rc_dst_register new_dst_reg(struct radeon_compiler *c,
struct rc_instruction *inst)
{
unsigned tmp;
if (is_dst_safe_to_reuse(inst))
tmp = inst->U.I.DstReg.Index;
else
tmp = rc_find_free_temporary(c);
unsigned tmp = rc_find_free_temporary(c);
return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
}
@ -240,7 +215,7 @@ static void transform_CEIL(struct radeon_compiler* c,
* ceil(x) = x+frac(-x)
*/
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, NULL, dst, negate(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
@ -278,7 +253,7 @@ static void transform_DST(struct radeon_compiler* c,
static void transform_FLR(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, NULL, dst, inst->U.I.SrcReg[0]);
emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
@ -311,7 +286,7 @@ static void transform_TRUNC(struct radeon_compiler* c,
abs = srcregswz(RC_FILE_TEMPORARY, tmp, inst->U.I.SrcReg[0].Swizzle);
}
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, NULL, dst, abs);
emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, abs,
negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
@ -407,7 +382,7 @@ static void transform_LIT(struct radeon_compiler* c,
static void transform_LRP(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit3(c, inst->Prev, RC_OPCODE_MAD, NULL,
dst,
@ -422,7 +397,7 @@ static void transform_LRP(struct radeon_compiler* c,
static void transform_POW(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register tempdst = try_to_reuse_dst(c, inst);
struct rc_dst_register tempdst = new_dst_reg(c, inst);
struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index);
tempdst.WriteMask = RC_MASK_W;
tempsrc.Swizzle = RC_SWIZZLE_WWWW;
@ -480,7 +455,7 @@ static void transform_RSQ(struct radeon_compiler* c,
static void transform_SEQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
@ -492,7 +467,7 @@ static void transform_SEQ(struct radeon_compiler* c,
static void transform_SGE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
@ -504,7 +479,7 @@ static void transform_SGE(struct radeon_compiler* c,
static void transform_SGT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
@ -516,7 +491,7 @@ static void transform_SGT(struct radeon_compiler* c,
static void transform_SLE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
@ -528,7 +503,7 @@ static void transform_SLE(struct radeon_compiler* c,
static void transform_SLT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
@ -540,7 +515,7 @@ static void transform_SLT(struct radeon_compiler* c,
static void transform_SNE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
@ -562,7 +537,7 @@ static void transform_SSG(struct radeon_compiler* c,
unsigned tmp1;
/* 0 < x */
dst0 = try_to_reuse_dst(c, inst);
dst0 = new_dst_reg(c, inst);
emit3(c, inst->Prev, RC_OPCODE_CMP, NULL,
dst0,
negate(inst->U.I.SrcReg[0]),
@ -650,7 +625,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
* LRP dst, tmp0, src1, src2
*
* Yes, I know, I'm a mad scientist. ~ C. & M. */
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
/* SLT tmp0, src0, 0.0 */
emit2(c, inst->Prev, RC_OPCODE_SLT, NULL,
@ -692,7 +667,7 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c,
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
struct rc_dst_register dst = new_dst_reg(c, inst);
unsigned constant_swizzle;
int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
0.0000000000000000001,
@ -718,7 +693,7 @@ static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
{
/* x = y <==> x >= y && y >= x */
/* x <= y */
struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
struct rc_dst_register dst0 = new_dst_reg(c, inst);
emit2(c, inst->Prev, RC_OPCODE_SGE, NULL,
dst0,
inst->U.I.SrcReg[0],
@ -745,7 +720,7 @@ static void transform_r300_vertex_SNE(struct radeon_compiler *c,
{
/* x != y <==> x < y || y < x */
/* x < y */
struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
struct rc_dst_register dst0 = new_dst_reg(c, inst);
emit2(c, inst->Prev, RC_OPCODE_SLT, NULL,
dst0,
inst->U.I.SrcReg[0],
@ -798,7 +773,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c,
unsigned tmp1;
/* 0 < x */
dst0 = try_to_reuse_dst(c, inst);
dst0 = new_dst_reg(c, inst);
emit2(c, inst->Prev, RC_OPCODE_SLT, NULL,
dst0,
builtin_zero,