From fea1fc4b495eaecba0060ef3d8f5efc561a42c8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 14 Nov 2022 16:17:25 +0100 Subject: [PATCH] aco: fix reset_block_regs() in postRA-optimizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accidentally, we picked the index of the predecessors instead of the predecessors. Totals from 8496 (6.30% of 134913) affected shaders: (GFX10.3) CodeSize: 64070724 -> 64022516 (-0.08%); split: -0.08%, +0.00% Instrs: 11932750 -> 11920698 (-0.10%); split: -0.10%, +0.00% Latency: 144040266 -> 144017062 (-0.02%); split: -0.02%, +0.00% InvThroughput: 29327735 -> 29326421 (-0.00%); split: -0.00%, +0.00% Fossil DB stats on Rembrandt (RDNA2): Totals from 4488 (3.33% of 134906) affected shaders: CodeSize: 42759736 -> 42735392 (-0.06%); split: -0.06%, +0.00% Instrs: 7960522 -> 7954436 (-0.08%); split: -0.08%, +0.00% Latency: 96192647 -> 96172571 (-0.02%); split: -0.02%, +0.00% InvThroughput: 19313576 -> 19312575 (-0.01%); split: -0.01%, +0.00% Fixes: 75967a4814be7988afc20e59bac4b48bafacab00 ('aco/optimizer_postRA: Speed up reset_block() with predecessors.') Reviewed-by: Timur Kristóf Part-of: (cherry picked from commit 8f4eccb13854d16aa604d6130fdb892e1cde9250) --- .pick_status.json | 2 +- src/amd/compiler/aco_optimizer_postRA.cpp | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index f56a5e42354..d3b85bd81c0 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3775,7 +3775,7 @@ "description": "aco: fix reset_block_regs() in postRA-optimizer", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "75967a4814be7988afc20e59bac4b48bafacab00" }, diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index 8ced375dd63..59c0d12548d 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -89,13 +89,14 @@ struct pr_opt_ctx { /* Mark overwritten if it doesn't match with other predecessors. */ const unsigned until_reg = min_reg + num_regs; - for (unsigned pred = 1; pred < num_preds; ++pred) { - for (unsigned i = min_reg; i < until_reg; ++i) { - Idx& idx = instr_idx_by_regs[block_index][i]; + for (unsigned i = 1; i < num_preds; ++i) { + unsigned pred = preds[i]; + for (unsigned reg = min_reg; reg < until_reg; ++reg) { + Idx& idx = instr_idx_by_regs[block_index][reg]; if (idx == overwritten_untrackable) continue; - if (idx != instr_idx_by_regs[pred][i]) + if (idx != instr_idx_by_regs[pred][reg]) idx = overwritten_untrackable; } }