r300: don't move position output to the end when duplicating it for WPOS

Instead just emit both outputs as soon as possible.

If the last write is inside a loop or a branch, emit it after
the ENDLOOP or ENDIF. This saves some temps and also allows us
to potentially benefit from R300_PVS_XYZW_VALID_INST as right
now the position output write is always penultimate with the
WPOS output being the last.

total temps in shared programs: 14101 -> 14029 (-0.51%)
temps in affected programs: 435 -> 363 (-16.55%)
helped: 72
HURT: 0

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip.gawin@zoho.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15321>
This commit is contained in:
Pavel Ondračka 2022-03-07 13:41:55 +01:00 committed by Marge Bot
parent 8cf10ae144
commit 5dcef1e7b8

View file

@ -143,19 +143,37 @@ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_ou
{
unsigned tempreg = rc_find_free_temporary(c);
struct rc_instruction * inst;
struct rc_instruction * insert_pos = c->Program.Instructions.Prev;
unsigned branch_depth = 0;
unsigned loop_depth = 0;
bool emit_after_control_flow = false;
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
loop_depth++;
if (inst->U.I.Opcode == RC_OPCODE_IF)
branch_depth++;
if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) ||
(inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--))
if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) {
insert_pos = inst;
emit_after_control_flow = false;
}
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tempreg;
insert_pos = inst;
if (loop_depth != 0 && branch_depth != 0)
emit_after_control_flow = true;
}
}
}
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
inst = rc_insert_new_instruction(c, insert_pos);
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
inst->U.I.DstReg.Index = output;
@ -164,7 +182,7 @@ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_ou
inst->U.I.SrcReg[0].Index = tempreg;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
inst = rc_insert_new_instruction(c, inst);
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
inst->U.I.DstReg.Index = dup_output;