From 6c2959c0256167bc97ed338e12e0543a967f2fc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Fri, 20 May 2022 11:11:07 +0200 Subject: [PATCH] r300: merge simple movs with constant swizzles together MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pass will merge instructions like these MOV output[0].x, temp[5].x___; MOV output[0].yzw, none._001; into MOV output[0].xyzw, temp[5].x001; It is currently very careful with control flow and dependency tracking, so there is still room for improvements. Shader-db stats with RV530: total instructions in shared programs: 132486 -> 132256 (-0.17%) instructions in affected programs: 6186 -> 5956 (-3.72%) helped: 65 HURT: 0 total temps in shared programs: 18035 -> 18014 (-0.12%) temps in affected programs: 295 -> 274 (-7.12%) helped: 22 HURT: 1 Signed-off-by: Pavel Ondračka Reviewed-by: Filip Gawin Part-of: --- .../drivers/r300/compiler/radeon_optimize.c | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 5a9f4a529df..2609534187d 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -887,6 +887,86 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) return 0; } +static unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) { + unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (unsigned int chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(swz1, chan); + if (swz != RC_SWIZZLE_UNUSED) { + SET_SWZ(new_swz, chan, swz); + continue; + } + swz = GET_SWZ(swz2, chan); + SET_SWZ(new_swz, chan, swz); + } + return new_swz; +} + +static int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst) +{ + unsigned int orig_dst_reg = inst->U.I.DstReg.Index; + unsigned int orig_dst_file = inst->U.I.DstReg.File; + unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask; + unsigned int orig_src_reg = inst->U.I.SrcReg[0].Index; + unsigned int orig_src_file = inst->U.I.SrcReg[0].File; + + struct rc_instruction * cur = inst; + while (cur!= &c->Program.Instructions) { + cur = cur->Next; + const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode); + + /* Keep it simple for now and stop when encountering any + * control flow. + */ + if (opcode->IsFlowControl) + return 0; + + /* Stop when the original destination is overwritten */ + if (orig_dst_reg == cur->U.I.DstReg.Index && + orig_dst_file == cur->U.I.DstReg.File && + (orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0) + return 0; + + /* Stop the search when the original instruction destination + * is used as a source for anything. + */ + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (cur->U.I.SrcReg[i].File == orig_dst_file && + cur->U.I.SrcReg[i].Index == orig_dst_reg) + return 0; + } + + if (cur->U.I.Opcode == RC_OPCODE_MOV && + cur->U.I.DstReg.File == orig_dst_file && + cur->U.I.DstReg.Index == orig_dst_reg && + (cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) { + + /* We can merge the movs if one of them is from inline constant */ + if (cur->U.I.SrcReg[0].File == RC_FILE_NONE || + orig_src_file == RC_FILE_NONE) { + cur->U.I.DstReg.WriteMask |= orig_dst_wmask; + + if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) { + cur->U.I.SrcReg[0].File = orig_src_file; + cur->U.I.SrcReg[0].Index = orig_src_reg; + cur->U.I.SrcReg[0].Abs = inst->U.I.SrcReg[0].Abs; + cur->U.I.SrcReg[0].RelAddr = inst->U.I.SrcReg[0].RelAddr; + } + cur->U.I.SrcReg[0].Swizzle = + merge_swizzles(cur->U.I.SrcReg[0].Swizzle, + inst->U.I.SrcReg[0].Swizzle); + + cur->U.I.SrcReg[0].Negate |= inst->U.I.SrcReg[0].Negate; + + /* finally delete the original mov */ + rc_remove_instruction(inst); + + return 1; + } + } + } + return 0; +} + void rc_optimize(struct radeon_compiler * c, void *user) { struct rc_instruction * inst = c->Program.Instructions.Next; @@ -900,6 +980,8 @@ void rc_optimize(struct radeon_compiler * c, void *user) continue; if (cur->U.I.Opcode == RC_OPCODE_MOV) { + if (merge_movs(c,cur)) + continue; copy_propagate(c, cur); /* cur may no longer be part of the program */ }