From fb6b95517eaeb777ccb82102a655f662933a84ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 31 Jul 2025 09:49:26 +0200 Subject: [PATCH] aco/scheduler: check dependencies of entire clause upfront and bail if any instruction of the clause can't be moved. Totals from 4310 (5.40% of 79839) affected shaders: MaxWaves: 115826 -> 115834 (+0.01%) Instrs: 6256436 -> 6257599 (+0.02%); split: -0.05%, +0.07% CodeSize: 32816488 -> 32820768 (+0.01%); split: -0.04%, +0.05% VGPRs: 260184 -> 260172 (-0.00%) Latency: 41207213 -> 41052150 (-0.38%); split: -0.45%, +0.07% InvThroughput: 6822608 -> 6815208 (-0.11%); split: -0.14%, +0.03% VClause: 148412 -> 147133 (-0.86%); split: -1.03%, +0.17% SClause: 120854 -> 120856 (+0.00%); split: -0.01%, +0.01% Copies: 425910 -> 427276 (+0.32%); split: -0.25%, +0.57% VALU: 3572293 -> 3573647 (+0.04%); split: -0.03%, +0.07% VOPD: 2803 -> 2816 (+0.46%) Part-of: --- src/amd/compiler/aco_scheduler.cpp | 46 +++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 52436cd9e2d..cd753a54866 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -200,30 +200,48 @@ MoveState::downwards_init(int current_idx, bool improved_rar_, bool may_form_cla return cursor; } +bool +check_dependencies(Instruction* instr, std::vector& def_dep, std::vector& op_dep) +{ + for (const Definition& def : instr->definitions) { + if (def.isTemp() && def_dep[def.tempId()]) + return true; + } + for (const Operand& op : instr->operands) { + if (op.isTemp() && op_dep[op.tempId()]) { + // FIXME: account for difference in register pressure + return true; + } + } + return false; +} + /* If add_to_clause is true, the current clause is extended by moving the * instruction at source_idx in front of the clause. Otherwise, the instruction * is moved past the end of the clause without extending it */ MoveResult MoveState::downwards_move(DownwardsCursor& cursor, bool add_to_clause) { - aco_ptr& instr = block->instructions[cursor.source_idx]; - - for (const Definition& def : instr->definitions) - if (def.isTemp() && depends_on[def.tempId()]) - return move_fail_ssa; + aco_ptr& candidate = block->instructions[cursor.source_idx]; /* check if one of candidate's operands is killed by depending instruction */ - std::vector& RAR_deps = - improved_rar ? (add_to_clause ? RAR_dependencies_clause : RAR_dependencies) : depends_on; - for (const Operand& op : instr->operands) { - if (op.isTemp() && RAR_deps[op.tempId()]) { - // FIXME: account for difference in register pressure - return move_fail_rar; + if (add_to_clause) { + assert(improved_rar); + aco_ptr& instr = block->instructions[cursor.insert_idx_clause]; + int i = cursor.source_idx; + while (should_form_clause(block->instructions[i].get(), instr.get())) { + if (check_dependencies(block->instructions[i].get(), depends_on, RAR_dependencies_clause)) + return move_fail_ssa; + i--; } + } else { + std::vector& RAR_deps = improved_rar ? RAR_dependencies : depends_on; + if (check_dependencies(candidate.get(), depends_on, RAR_deps)) + return move_fail_ssa; } if (add_to_clause) { - for (const Operand& op : instr->operands) { + for (const Operand& op : candidate->operands) { if (op.isTemp()) { depends_on[op.tempId()] = true; if (op.isFirstKill()) @@ -239,12 +257,12 @@ MoveState::downwards_move(DownwardsCursor& cursor, bool add_to_clause) } /* Check the new demand of the instructions being moved over */ - const RegisterDemand candidate_diff = get_live_changes(instr.get()); + const RegisterDemand candidate_diff = get_live_changes(candidate.get()); if (RegisterDemand(register_pressure - candidate_diff).exceeds(max_registers)) return move_fail_pressure; /* New demand for the moved instruction */ - const RegisterDemand temp = get_temp_registers(instr.get()); + const RegisterDemand temp = get_temp_registers(candidate.get()); const RegisterDemand insert_demand = add_to_clause ? cursor.insert_demand_clause : cursor.insert_demand; const RegisterDemand new_demand = insert_demand + temp;