aco/scheduler: move clauses through RAR dependencies
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

For simplicity, we limit this feature to only one RAR-dependency per clause.
This allows to quickly correct the register demand changes that occur by
switching the kill flags.

Totals from 5861 (7.34% of 79839) affected shaders: (Navi48)
Instrs: 4891340 -> 4883789 (-0.15%); split: -0.21%, +0.06%
CodeSize: 25556612 -> 25527244 (-0.11%); split: -0.16%, +0.05%
VGPRs: 347044 -> 347140 (+0.03%); split: -0.13%, +0.16%
Latency: 32697095 -> 32642428 (-0.17%); split: -0.25%, +0.08%
InvThroughput: 4975909 -> 4975086 (-0.02%); split: -0.06%, +0.05%
VClause: 102152 -> 93852 (-8.13%); split: -8.22%, +0.10%
SClause: 101232 -> 101205 (-0.03%); split: -0.03%, +0.00%
Copies: 305189 -> 305651 (+0.15%); split: -0.56%, +0.71%
Branches: 87032 -> 87045 (+0.01%); split: -0.00%, +0.02%
VALU: 2776634 -> 2777097 (+0.02%); split: -0.06%, +0.08%
SALU: 662066 -> 660379 (-0.25%); split: -0.26%, +0.01%
VOPD: 4801 -> 4800 (-0.02%); split: +1.21%, -1.23%

Totals from 5680 (7.12% of 79825) affected shaders: (Vangogh)
MaxWaves: 111282 -> 111290 (+0.01%)
Instrs: 4955907 -> 4950709 (-0.10%); split: -0.15%, +0.04%
CodeSize: 26026264 -> 26014272 (-0.05%); split: -0.10%, +0.05%
VGPRs: 320784 -> 320776 (-0.00%); split: -0.03%, +0.03%
Latency: 35645457 -> 35584438 (-0.17%); split: -0.32%, +0.15%
InvThroughput: 8233912 -> 8236524 (+0.03%); split: -0.10%, +0.13%
VClause: 107017 -> 96804 (-9.54%); split: -9.69%, +0.15%
SClause: 98633 -> 98592 (-0.04%); split: -0.05%, +0.01%
Copies: 394041 -> 393584 (-0.12%); split: -0.52%, +0.40%
Branches: 120235 -> 120231 (-0.00%); split: -0.02%, +0.01%
VALU: 3183571 -> 3183114 (-0.01%); split: -0.06%, +0.05%
SALU: 735546 -> 734143 (-0.19%); split: -0.20%, +0.01%

Totals from 2507 (3.96% of 63370) affected shaders: (Vega10)

MaxWaves: 13643 -> 13637 (-0.04%)
Instrs: 1496453 -> 1496135 (-0.02%); split: -0.11%, +0.09%
CodeSize: 7777880 -> 7776608 (-0.02%); split: -0.09%, +0.07%
VGPRs: 134164 -> 134104 (-0.04%); split: -0.11%, +0.07%
Latency: 17465181 -> 17483075 (+0.10%); split: -0.36%, +0.47%
InvThroughput: 8830470 -> 8851751 (+0.24%); split: -0.09%, +0.33%
VClause: 42012 -> 38825 (-7.59%); split: -8.00%, +0.42%
SClause: 34586 -> 34549 (-0.11%); split: -0.12%, +0.01%
Copies: 137896 -> 137668 (-0.17%); split: -0.86%, +0.69%
VALU: 1092468 -> 1092240 (-0.02%); split: -0.11%, +0.09%
SALU: 132956 -> 132569 (-0.29%); split: -0.34%, +0.05%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38135>
This commit is contained in:
Daniel Schürmann 2025-10-29 11:54:05 +01:00 committed by Marge Bot
parent 65ba8a0e8b
commit 668259ef0b

View file

@ -94,7 +94,7 @@ struct MoveState {
/* for moving instructions before the current instruction to after it */ /* for moving instructions before the current instruction to after it */
DownwardsCursor downwards_init(int current_idx, bool improved_rar); DownwardsCursor downwards_init(int current_idx, bool improved_rar);
MoveResult downwards_check_deps(Instruction* instr); MoveResult downwards_check_deps(Instruction* instr, Temp* rar_dep = NULL);
MoveResult downwards_move(DownwardsCursor&); MoveResult downwards_move(DownwardsCursor&);
MoveResult downwards_move_clause(DownwardsCursor&); MoveResult downwards_move_clause(DownwardsCursor&);
void downwards_skip(DownwardsCursor&); void downwards_skip(DownwardsCursor&);
@ -185,7 +185,7 @@ MoveState::downwards_init(int current_idx, bool improved_rar_)
} }
MoveResult MoveResult
MoveState::downwards_check_deps(Instruction* instr) MoveState::downwards_check_deps(Instruction* instr, Temp* rar_dep)
{ {
for (const Definition& def : instr->definitions) { for (const Definition& def : instr->definitions) {
if (def.isTemp() && depends_on[def.tempId()]) if (def.isTemp() && depends_on[def.tempId()])
@ -199,9 +199,13 @@ MoveState::downwards_check_deps(Instruction* instr)
if (!improved_rar && depends_on[op.tempId()]) if (!improved_rar && depends_on[op.tempId()])
return move_fail_rar; return move_fail_rar;
if (improved_rar && rar_dependencies.count(op.tempId())) if (improved_rar && rar_dependencies.count(op.tempId())) {
// FIXME: account for difference in register pressure /* We allow for exactly one read-after-read dependency. */
return move_fail_rar; if (rar_dep && (*rar_dep == Temp() || *rar_dep == op.getTemp()))
*rar_dep = op.getTemp();
else
return move_fail_rar;
}
} }
return move_success; return move_success;
@ -284,10 +288,11 @@ MoveState::downwards_move_clause(DownwardsCursor& cursor)
/* Check if one of candidates' operands is killed by depending instruction. */ /* Check if one of candidates' operands is killed by depending instruction. */
RegisterDemand max_clause_demand; RegisterDemand max_clause_demand;
Temp rar_dep = Temp();
while (should_form_clause(block->instructions[clause_begin_idx].get(), instr)) { while (should_form_clause(block->instructions[clause_begin_idx].get(), instr)) {
Instruction* candidate = block->instructions[clause_begin_idx--].get(); Instruction* candidate = block->instructions[clause_begin_idx--].get();
MoveResult res = downwards_check_deps(candidate); MoveResult res = downwards_check_deps(candidate, &rar_dep);
if (res != move_success) if (res != move_success)
return res; return res;
@ -306,7 +311,7 @@ MoveState::downwards_move_clause(DownwardsCursor& cursor)
/* RegisterDemand changes caused by the clause. */ /* RegisterDemand changes caused by the clause. */
RegisterDemand clause_diff = clause_end_demand - clause_begin_demand; RegisterDemand clause_diff = clause_end_demand - clause_begin_demand;
/* RegisterDemand changes caused by the instructions being moved over. */ /* RegisterDemand changes caused by the instructions being moved over. */
RegisterDemand insert_diff = insert_demand - clause_end_demand; RegisterDemand insert_diff = insert_demand - clause_end_demand + rar_dep;
/* Check the new demand of the instructions being moved over. */ /* Check the new demand of the instructions being moved over. */
if (RegisterDemand(cursor.total_demand - clause_diff).exceeds(max_registers)) if (RegisterDemand(cursor.total_demand - clause_diff).exceeds(max_registers))
@ -316,11 +321,53 @@ MoveState::downwards_move_clause(DownwardsCursor& cursor)
if (RegisterDemand(max_clause_demand + insert_diff).exceeds(max_registers)) if (RegisterDemand(max_clause_demand + insert_diff).exceeds(max_registers))
return move_fail_pressure; return move_fail_pressure;
/* Update kill flags if we move over a RAR dependency:
* The changed kill flags also affect the temp register demand, so re-calculate
* that as well.
*/
int rar_index = insert_idx;
if (rar_dep != Temp()) {
for (int i = clause_end_idx; i > clause_begin_idx; i--) {
/* Subtract the RAR temp from any clause instruction after the kill. */
instr = block->instructions[i].get();
instr->register_demand -= rar_dep;
bool first = true;
for (Operand& op : instr->operands) {
if (op.isTemp() && op.getTemp() == rar_dep) {
if (first)
instr->register_demand -= get_temp_registers(instr);
op.setKill(true);
op.setFirstKill(first);
first = false;
}
}
if (first == false) {
instr->register_demand += get_temp_registers(instr);
break;
}
}
rar_index = cursor.insert_idx + rar_dependencies[rar_dep.id()];
Instruction* rar_instr = block->instructions[rar_index].get();
rar_instr->register_demand -= get_temp_registers(rar_instr);
for (Operand& op : rar_instr->operands) {
if (op.isTemp() && op.getTemp() == rar_dep && !op.isCopyKill())
op.setKill(false);
}
rar_instr->register_demand += get_temp_registers(rar_instr) + rar_dep;
}
/* Update register demand. */ /* Update register demand. */
for (int i = clause_begin_idx + 1; i <= clause_end_idx; i++) for (int i = clause_begin_idx + 1; i <= clause_end_idx; i++)
block->instructions[i]->register_demand += insert_diff; block->instructions[i]->register_demand += insert_diff;
for (int i = clause_end_idx + 1; i <= insert_idx; i++) for (int i = clause_end_idx + 1; i <= rar_index; i++)
block->instructions[i]->register_demand -= clause_diff; block->instructions[i]->register_demand -= clause_diff;
for (int i = rar_index + 1; i <= insert_idx; i++) {
/* Add the RAR temp to instructions after the original kill. */
block->instructions[i]->register_demand -= clause_diff;
block->instructions[i]->register_demand += rar_dep;
}
/* Move the clause before the memory instruction. */ /* Move the clause before the memory instruction. */
move_element(block->instructions.begin(), clause_begin_idx + 1, cursor.insert_idx_clause, move_element(block->instructions.begin(), clause_begin_idx + 1, cursor.insert_idx_clause,