From f7d02a9b5ebab5eb989f9712f407037bced5d417 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 15 Aug 2022 17:21:05 +0100 Subject: [PATCH] aco: test for one and_savexec opcode in try_optimize_branching_sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A situation where it doesn't match is probably not possible, so this probably doesn't fix anything. Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_ssa_elimination.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 32d1ae9957e..01f32337825 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -347,9 +347,10 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in aco_ptr& exec_val = block.instructions[exec_val_idx]; aco_ptr& exec_copy = block.instructions[exec_copy_idx]; - if (exec_copy->opcode != aco_opcode::s_and_saveexec_b32 && - exec_copy->opcode != aco_opcode::s_and_saveexec_b64 && - exec_copy->opcode != aco_opcode::p_parallelcopy) + const aco_opcode and_saveexec = ctx.program->lane_mask == s2 ? aco_opcode::s_and_saveexec_b64 + : aco_opcode::s_and_saveexec_b32; + + if (exec_copy->opcode != and_saveexec && exec_copy->opcode != aco_opcode::p_parallelcopy) return; if (exec_val->definitions.size() > 1) @@ -361,8 +362,7 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in const bool vopc = v_cmpx_op != aco_opcode::num_opcodes; /* If s_and_saveexec is used, we'll need to insert a new instruction to save the old exec. */ - const bool save_original_exec = exec_copy->opcode == aco_opcode::s_and_saveexec_b32 || - exec_copy->opcode == aco_opcode::s_and_saveexec_b64; + const bool save_original_exec = exec_copy->opcode == and_saveexec; /* Position where the original exec mask copy should be inserted. */ const int save_original_exec_idx = exec_val_idx; /* The copy can be removed when it kills its operand.