From 31f62a6123d46311bed7a54dc10e518bbe4c5f8d Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 8 Oct 2024 20:30:15 +0200 Subject: [PATCH] aco/insert_exec: don't always reset top exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows to re-use previous temporaries in case exec was restored from a Temp, rather than having to create a new copy of exec. Foz-DB Navi31: Totals from 545 (0.69% of 79395) affected shaders: Instrs: 216563 -> 215698 (-0.40%) CodeSize: 1183536 -> 1180076 (-0.29%) Latency: 1135269 -> 1135294 (+0.00%); split: -0.00%, +0.00% Copies: 11933 -> 11072 (-7.22%) SALU: 18990 -> 18129 (-4.53%) Reviewed-by: Daniel Schürmann Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_insert_exec_mask.cpp | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 26a06c596da..d6815305ce8 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -110,7 +110,6 @@ transition_to_WQM(exec_ctx& ctx, Builder bld, unsigned idx) assert(ctx.info[idx].exec.back().op.size() == bld.lm.size()); assert(ctx.info[idx].exec.back().op.isTemp()); bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op); - ctx.info[idx].exec.back().op = Operand(exec, bld.lm); } void @@ -128,7 +127,6 @@ transition_to_Exact(exec_ctx& ctx, Builder bld, unsigned idx) assert(ctx.info[idx].exec.back().op.size() == bld.lm.size()); assert(ctx.info[idx].exec.back().op.isTemp()); bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op); - ctx.info[idx].exec.back().op = Operand(exec, bld.lm); return; } /* otherwise, we create an exact mask and push to the stack */ @@ -366,8 +364,6 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> Operand restore = ctx.info[idx].exec.back().op; assert(restore.size() == bld.lm.size()); bld.copy(Definition(exec, bld.lm), restore); - if (!restore.isConstant()) - ctx.info[idx].exec.back().op = Operand(exec, bld.lm); } return i; @@ -507,6 +503,8 @@ process_instructions(exec_ctx& ctx, Block* block, std::vectorend())); Operand restore = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm)); - ctx.info[idx].exec.emplace(std::prev(ctx.info[idx].exec.end()), restore, mask); + ctx.info[idx].exec.back().op = restore; bld.reset(bld.instructions); } - ctx.info[idx].exec.back().type &= (mask_type_wqm | mask_type_exact); + uint8_t mask = ctx.info[idx].exec.back().type & (mask_type_wqm | mask_type_exact); + ctx.info[idx].exec.emplace_back(Operand(exec, bld.lm), mask); } unsigned num_exec_masks = ctx.info[idx].exec.size(); @@ -638,10 +634,8 @@ add_branch_code(exec_ctx& ctx, Block* block) need_parallelcopy = true; } - if (need_parallelcopy) { + if (need_parallelcopy) bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op); - ctx.info[idx].exec.back().op = Operand(exec, bld.lm); - } bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2), Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]); } else if (block->kind & block_kind_uniform) {