From b872ff6ef28bc44ac0f7aa5f963a273e40c79a61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 18 Feb 2025 10:31:04 +0100 Subject: [PATCH] aco/insert_exec_mask: if applicable, use s_wqm to restore exec after divergent CF Totals from 4740 (5.97% of 79377) affected shaders: (Navi31) Instrs: 6273963 -> 6273410 (-0.01%); split: -0.01%, +0.00% CodeSize: 34306560 -> 34304284 (-0.01%); split: -0.01%, +0.00% SpillSGPRs: 1793 -> 1797 (+0.22%); split: -0.11%, +0.33% Latency: 62599300 -> 62598714 (-0.00%); split: -0.00%, +0.00% InvThroughput: 9117199 -> 9117189 (-0.00%); split: -0.00%, +0.00% SClause: 223548 -> 223529 (-0.01%); split: -0.02%, +0.01% Copies: 464248 -> 454711 (-2.05%); split: -2.06%, +0.00% Branches: 161446 -> 161443 (-0.00%); split: -0.00%, +0.00% PreSGPRs: 226278 -> 225608 (-0.30%) VALU: 3793235 -> 3793244 (+0.00%); split: -0.00%, +0.00% SALU: 606184 -> 605759 (-0.07%); split: -0.08%, +0.01% Part-of: --- src/amd/compiler/aco_insert_exec_mask.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index dde79f35a7e..831f6a0e04a 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -362,14 +362,19 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> } if (ctx.handle_wqm) { - /* End WQM handling if not needed anymore */ if (block->kind & block_kind_top_level && ctx.info[idx].exec.size() == 2) { + /* End WQM handling if not needed anymore */ if (block->instructions[i]->opcode == aco_opcode::p_end_wqm) { ctx.info[idx].exec.back().type |= mask_type_global; transition_to_Exact(ctx, bld, idx); ctx.handle_wqm = false; restore_exec = false; i++; + } else if (restore_exec && ctx.info[idx].exec[1].type & mask_type_global) { + /* Use s_wqm to restore exec after divergent CF in order to disable dead quads. */ + bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc), + ctx.info[idx].exec[0].op); + restore_exec = false; } } }