From e90cafa435f743c586dc58f6b51c4db73204b1db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Sat, 27 Aug 2022 20:16:56 +0200 Subject: [PATCH] r300: add special path for merging movs with the same source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is quite rare but still helps few tesseract shaders and is quite straightforward. shader-db with RV530: total instructions in shared programs: 135671 -> 135646 (-0.02%) instructions in affected programs: 322 -> 297 (-7.76%) helped: 13 HURT: 0 Reviewed-by: Filip Gawin Signed-off-by: Pavel Ondračka Part-of: --- .../drivers/r300/compiler/radeon_optimize.c | 57 +++++++++++++------ 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 9a338275ee0..5b5d92f9f90 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -929,6 +929,23 @@ static unsigned int fill_swizzle(unsigned int orig_swz, unsigned int wmask, unsi return orig_swz; } +static int have_shared_source(struct rc_instruction * inst1, struct rc_instruction * inst2) +{ + int shared_src = -1; + const struct rc_opcode_info * opcode1 = rc_get_opcode_info(inst1->U.I.Opcode); + const struct rc_opcode_info * opcode2 = rc_get_opcode_info(inst2->U.I.Opcode); + for (unsigned i = 0; i < opcode1->NumSrcRegs; i++) { + for (unsigned j = 0; j < opcode2->NumSrcRegs; j++) { + if (inst1->U.I.SrcReg[i].File == inst2->U.I.SrcReg[j].File && + inst1->U.I.SrcReg[i].Index == inst2->U.I.SrcReg[j].Index && + inst1->U.I.SrcReg[i].RelAddr == inst2->U.I.SrcReg[j].RelAddr && + inst1->U.I.SrcReg[i].Abs == inst2->U.I.SrcReg[j].Abs) + shared_src = i; + } + } + return shared_src; +} + /** * Merges two MOVs writing different channels of the same destination register * with the use of the constant swizzles. @@ -967,6 +984,29 @@ static bool merge_movs( } } + /* Handle the trivial case where the MOVs share a source. + * + * For example + * MOV temp[0].x const[0].x + * MOV temp[0].y const[0].z + * + * becomes + * MOV temp[0].xy const[0].xz + */ + if (have_shared_source(inst, cur) == 0) { + struct rc_src_register src = cur->U.I.SrcReg[0]; + src.Negate = merge_negates(inst->U.I.SrcReg[0], cur->U.I.SrcReg[0]); + src.Swizzle = merge_swizzles(cur->U.I.SrcReg[0].Swizzle, + inst->U.I.SrcReg[0].Swizzle); + + if (c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src)) { + cur->U.I.DstReg.WriteMask |= orig_dst_wmask; + cur->U.I.SrcReg[0] = src; + rc_remove_instruction(inst); + return true; + } + } + /* Otherwise, we can convert the MOVs into ADD. * * For example @@ -998,23 +1038,6 @@ static bool merge_movs( return true; } -static int have_shared_source(struct rc_instruction * inst1, struct rc_instruction * inst2) -{ - int shared_src = -1; - const struct rc_opcode_info * opcode1 = rc_get_opcode_info(inst1->U.I.Opcode); - const struct rc_opcode_info * opcode2 = rc_get_opcode_info(inst2->U.I.Opcode); - for (unsigned i = 0; i < opcode1->NumSrcRegs; i++) { - for (unsigned j = 0; j < opcode2->NumSrcRegs; j++) { - if (inst1->U.I.SrcReg[i].File == inst2->U.I.SrcReg[j].File && - inst1->U.I.SrcReg[i].Index == inst2->U.I.SrcReg[j].Index && - inst1->U.I.SrcReg[i].RelAddr == inst2->U.I.SrcReg[j].RelAddr && - inst1->U.I.SrcReg[i].Abs == inst2->U.I.SrcReg[j].Abs) - shared_src = i; - } - } - return shared_src; -} - /** * This function will try to merge MOV and ADD/MUL instructions with the same * destination, making use of the constant swizzles.