From 0338bb9ae84cf8b512db4eef6d6c29b887d58b35 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 4 Oct 2024 10:48:31 +0200 Subject: [PATCH] aco/ssa_elimination: also optimize branching sequence with s_and without saveexec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit insert_exec will start using this in the future, handle it the same just without the path to save exec before the v_cmpx instruction. No Foz-DB changes. Reviewed-by: Daniel Schürmann Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_ssa_elimination.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 94ebee7d13a..39ed8ab7d72 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -337,7 +337,15 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in const aco_opcode and_saveexec = ctx.program->lane_mask == s2 ? aco_opcode::s_and_saveexec_b64 : aco_opcode::s_and_saveexec_b32; - if (exec_copy->opcode != and_saveexec && exec_copy->opcode != aco_opcode::p_parallelcopy) + const aco_opcode s_and = + ctx.program->lane_mask == s2 ? aco_opcode::s_and_b64 : aco_opcode::s_and_b32; + + if (exec_copy->opcode != and_saveexec && exec_copy->opcode != aco_opcode::p_parallelcopy && + (exec_copy->opcode != s_and || exec_copy->operands[1].physReg() != exec)) + return; + + /* The SCC def of s_and/s_and_saveexec must be unused. */ + if (exec_copy->opcode != aco_opcode::p_parallelcopy && !exec_copy->definitions[1].isKill()) return; /* Only allow SALU with multiple definitions. */