From 2d0b4d36a3fa7e680687739cb4cbc1044a59192d Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 20 Feb 2023 14:37:33 +0000 Subject: [PATCH] aco: fix pathological case in LdsDirectVALUHazard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to bfd4ac4581bae0d36b2c58524131f91c7c0d27ce. No fossil-db changes. Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Fixes: 296b4d95a3e ("aco/gfx11: workaround LdsDirectVALUHazard") Part-of: (cherry picked from commit 94abccf3ce50383a1ca0feceb6e95a065b89c19f) --- .pick_status.json | 2 +- src/amd/compiler/aco_insert_NOPs.cpp | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index e29bc070ac0..e4170c00bf1 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -7627,7 +7627,7 @@ "description": "aco: fix pathological case in LdsDirectVALUHazard", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "296b4d95a3e35b2ab62ebcbd7e3a578fcbdd5b26" }, diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 51bd5caf98d..068ff4f4b9f 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -1044,6 +1044,9 @@ struct LdsDirectVALUHazardGlobalState { struct LdsDirectVALUHazardBlockState { unsigned num_valu = 0; bool has_trans = false; + + unsigned num_instrs = 0; + unsigned num_blocks = 0; }; bool @@ -1076,6 +1079,14 @@ handle_lds_direct_valu_hazard_instr(LdsDirectVALUHazardGlobalState& global_state if (parse_vdst_wait(instr) == 0) return true; + block_state.num_instrs++; + if (block_state.num_instrs > 256 || block_state.num_blocks > 32) { + /* Exit to limit compile times and set wait_vdst to be safe. */ + global_state.wait_vdst = + MIN2(global_state.wait_vdst, block_state.has_trans ? 0 : block_state.num_valu); + return true; + } + return block_state.num_valu >= global_state.wait_vdst; } @@ -1089,6 +1100,8 @@ handle_lds_direct_valu_hazard_block(LdsDirectVALUHazardGlobalState& global_state global_state.loop_headers_visited.insert(block->index); } + block_state.num_blocks++; + return true; }