From 410eff4d2f3e37d2581d35eb11c82ee1355481f3 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 14 Aug 2022 13:56:30 +0200 Subject: [PATCH] aco: Fix optimizing branching sequence with s_and_saveexec. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This optimization was broken for two reasons: - s_and_saveexec has two operands, the copy value and exec - s_and_saveexec has an exec read, so exec_write_used will always be true before we find branch_exec_val_idx Foz-DB Navi21: Totals from 31453 (23.31% of 134913) affected shaders: CodeSize: 204831260 -> 204831156 (-0.00%) Instrs: 38157117 -> 38157091 (-0.00%) Latency: 533708882 -> 531211721 (-0.47%); split: -0.47%, +0.00% InvThroughput: 107088408 -> 106719188 (-0.34%); split: -0.35%, +0.00% Copies: 2326179 -> 2502490 (+7.58%) Signed-off-by: Georg Lehmann Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_ssa_elimination.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index f82058a6c44..2d8c3a10dfc 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -544,7 +544,7 @@ eliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block) /* For a newly encountered exec write, clear the used flag. */ if (writes_exec) { - if (!logical_end_found && branch_reads_exec && instr->operands.size() == 1) { + if (!logical_end_found && branch_reads_exec && instr->operands.size()) { /* We are in a branch that jumps according to exec. * We just found the instruction that copies to exec before the branch. */ @@ -560,13 +560,16 @@ eliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block) } exec_write_used = false; - } - - if (branch_exec_tempid && !exec_write_used && instr->definitions.size() && - instr->definitions[0].tempId() == branch_exec_tempid) { + } else if (branch_exec_tempid && instr->definitions.size() && + instr->definitions[0].tempId() == branch_exec_tempid) { /* We just found the instruction that produces the exec mask that is copied. */ assert(branch_exec_val_idx == -1); branch_exec_val_idx = i; + } else if (branch_exec_tempid && branch_exec_val_idx == -1 && needs_exec) { + /* There is an instruction that needs the original exec mask before + * branch_exec_val_idx was found, so we can't optimize the branching sequence. */ + branch_exec_copy_idx = -1; + branch_exec_tempid = 0; } /* If the current instruction needs exec, mark it as used. */