From 275beae42d4f74790b084892269041f4fcafee48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Tue, 12 Jul 2022 16:04:22 +0200 Subject: [PATCH] r300: merge MOVs into ADD using the 0 swizzle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shader-db stats with RV530: total instructions in shared programs: 169509 -> 166013 (-2.06%) instructions in affected programs: 99126 -> 95630 (-3.53%) total presub in shared programs: 10975 -> 10758 (-1.98%) presub in affected programs: 744 -> 527 (-29.17%) total temps in shared programs: 21722 -> 21649 (-0.34%) temps in affected programs: 1350 -> 1277 (-5.41%) Signed-off-by: Pavel Ondračka Reviewed-by: Filip Gawin Part-of: --- .../drivers/r300/compiler/radeon_optimize.c | 43 ++++++++++++++++--- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 14d8eb6bb4e..b13c8634f94 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -959,14 +959,43 @@ static bool merge_movs( src.Swizzle = merge_swizzles(cur->U.I.SrcReg[0].Swizzle, inst->U.I.SrcReg[0].Swizzle); src.Negate = merge_negates(inst->U.I.SrcReg[0], cur->U.I.SrcReg[0]); - if (!c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src)) - return false; - cur->U.I.DstReg.WriteMask |= orig_dst_wmask; - cur->U.I.SrcReg[0] = src; - rc_remove_instruction(inst); - return true; + if (c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src)) { + cur->U.I.DstReg.WriteMask |= orig_dst_wmask; + cur->U.I.SrcReg[0] = src; + rc_remove_instruction(inst); + return true; + } } - return false; + + /* Otherwise, we can convert the MOVs into ADD. + * + * For example + * MOV temp[0].x const[0].x + * MOV temp[0].y input[0].y + * + * becomes + * ADD temp[0].xy const[0].x0 input[0].0y + */ + unsigned wmask = cur->U.I.DstReg.WriteMask | orig_dst_wmask; + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = cur->U.I.SrcReg[0]; + + src0.Swizzle = fill_swizzle(src0.Swizzle, + wmask, RC_SWIZZLE_ZERO); + src1.Swizzle = fill_swizzle(src1.Swizzle, + wmask, RC_SWIZZLE_ZERO); + if (!c->SwizzleCaps->IsNative(RC_OPCODE_ADD, src0) || + !c->SwizzleCaps->IsNative(RC_OPCODE_ADD, src1)) + return false; + + cur->U.I.DstReg.WriteMask = wmask; + cur->U.I.Opcode = RC_OPCODE_ADD; + cur->U.I.SrcReg[0] = src0; + cur->U.I.SrcReg[1] = src1; + + /* finally delete the original mov */ + rc_remove_instruction(inst); + return true; } static bool inst_combination(