r300: remove simple duplicate ARL instructions

Removes duplicate ARL reads from the same source when the original ADDR
register is still live. This is the remaining low-hanging fruit from #7723
Should account for most of the potential improvements and is also
trivial as no source or destination rewrite is needed.

RV530:
total instructions in shared programs: 132447 -> 131488 (-0.72%)
instructions in affected programs: 33396 -> 32437 (-2.87%)
helped: 331
HURT: 0
total temps in shared programs: 17035 -> 17015 (-0.12%)
temps in affected programs: 361 -> 341 (-5.54%)
helped: 30
HURT: 10

RV370:
total instructions in shared programs: 83555 -> 82659 (-1.07%)
instructions in affected programs: 28310 -> 27414 (-3.16%)
helped: 312
HURT: 0
total temps in shared programs: 12418 -> 12426 (0.06%)
temps in affected programs: 302 -> 310 (2.65%)
helped: 21
HURT: 29

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22752>
This commit is contained in:
Pavel Ondračka 2023-04-26 13:38:41 +02:00 committed by Marge Bot
parent c5b3d488f9
commit 775e42e6b8

View file

@ -1331,6 +1331,49 @@ static void merge_channels(struct radeon_compiler * c, struct rc_instruction * i
}
}
/**
* Searches for duplicate ARLs
*
* Only a very trivial case is now optimized where if a second ARL is detected which reads from
* the same register as the first one and source is the same, just remove the second one.
*/
static void merge_ARL(struct radeon_compiler * c, struct rc_instruction * inst)
{
unsigned int ARL_src_reg = inst->U.I.SrcReg[0].Index;
unsigned int ARL_src_file = inst->U.I.SrcReg[0].File;
unsigned int ARL_src_swizzle = inst->U.I.SrcReg[0].Swizzle;
struct rc_instruction * cur = inst;
while (cur != &c->Program.Instructions) {
cur = cur->Next;
const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode);
/* Keep it simple for now and stop when encountering any
* control flow.
*/
if (opcode->IsFlowControl)
return;
/* Stop when the original source is overwritten */
if (ARL_src_reg == cur->U.I.DstReg.Index &&
ARL_src_file == cur->U.I.DstReg.File &&
cur->U.I.DstReg.WriteMask | rc_swizzle_to_writemask(ARL_src_swizzle))
return;
if (cur->U.I.Opcode == RC_OPCODE_ARL) {
if (ARL_src_reg == cur->U.I.SrcReg[0].Index &&
ARL_src_file == cur->U.I.SrcReg[0].File &&
ARL_src_swizzle == cur->U.I.SrcReg[0].Swizzle) {
struct rc_instruction * next = cur->Next;
rc_remove_instruction(cur);
cur = next;
} else {
return;
}
}
}
}
void rc_optimize(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst = c->Program.Instructions.Next;
@ -1351,7 +1394,7 @@ void rc_optimize(struct radeon_compiler * c, void *user)
}
/* Merge MOVs to same source in different channels using the constant
* swizzles.
* swizzle.
*/
if (c->is_r500 || c->type == RC_VERTEX_PROGRAM) {
inst = c->Program.Instructions.Next;
@ -1384,7 +1427,16 @@ void rc_optimize(struct radeon_compiler * c, void *user)
peephole(c, cur);
}
if (!c->has_omod) {
inst = c->Program.Instructions.Next;
while (inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
if (cur->U.I.Opcode == RC_OPCODE_ARL) {
merge_ARL(c, cur);
}
}
return;
}