mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 11:40:10 +01:00
aco/scheduler: move clauses through RAR dependencies
For simplicity, we limit this feature to only one RAR-dependency per clause. This allows to quickly correct the register demand changes that occur by switching the kill flags. Totals from 5861 (7.34% of 79839) affected shaders: (Navi48) Instrs: 4891340 -> 4883789 (-0.15%); split: -0.21%, +0.06% CodeSize: 25556612 -> 25527244 (-0.11%); split: -0.16%, +0.05% VGPRs: 347044 -> 347140 (+0.03%); split: -0.13%, +0.16% Latency: 32697095 -> 32642428 (-0.17%); split: -0.25%, +0.08% InvThroughput: 4975909 -> 4975086 (-0.02%); split: -0.06%, +0.05% VClause: 102152 -> 93852 (-8.13%); split: -8.22%, +0.10% SClause: 101232 -> 101205 (-0.03%); split: -0.03%, +0.00% Copies: 305189 -> 305651 (+0.15%); split: -0.56%, +0.71% Branches: 87032 -> 87045 (+0.01%); split: -0.00%, +0.02% VALU: 2776634 -> 2777097 (+0.02%); split: -0.06%, +0.08% SALU: 662066 -> 660379 (-0.25%); split: -0.26%, +0.01% VOPD: 4801 -> 4800 (-0.02%); split: +1.21%, -1.23% Totals from 5680 (7.12% of 79825) affected shaders: (Vangogh) MaxWaves: 111282 -> 111290 (+0.01%) Instrs: 4955907 -> 4950709 (-0.10%); split: -0.15%, +0.04% CodeSize: 26026264 -> 26014272 (-0.05%); split: -0.10%, +0.05% VGPRs: 320784 -> 320776 (-0.00%); split: -0.03%, +0.03% Latency: 35645457 -> 35584438 (-0.17%); split: -0.32%, +0.15% InvThroughput: 8233912 -> 8236524 (+0.03%); split: -0.10%, +0.13% VClause: 107017 -> 96804 (-9.54%); split: -9.69%, +0.15% SClause: 98633 -> 98592 (-0.04%); split: -0.05%, +0.01% Copies: 394041 -> 393584 (-0.12%); split: -0.52%, +0.40% Branches: 120235 -> 120231 (-0.00%); split: -0.02%, +0.01% VALU: 3183571 -> 3183114 (-0.01%); split: -0.06%, +0.05% SALU: 735546 -> 734143 (-0.19%); split: -0.20%, +0.01% Totals from 2507 (3.96% of 63370) affected shaders: (Vega10) MaxWaves: 13643 -> 13637 (-0.04%) Instrs: 1496453 -> 1496135 (-0.02%); split: -0.11%, +0.09% CodeSize: 7777880 -> 7776608 (-0.02%); split: -0.09%, +0.07% VGPRs: 134164 -> 134104 (-0.04%); split: -0.11%, +0.07% Latency: 17465181 -> 17483075 (+0.10%); split: -0.36%, +0.47% InvThroughput: 8830470 -> 8851751 (+0.24%); split: -0.09%, +0.33% VClause: 42012 -> 38825 (-7.59%); split: -8.00%, +0.42% SClause: 34586 -> 34549 (-0.11%); split: -0.12%, +0.01% Copies: 137896 -> 137668 (-0.17%); split: -0.86%, +0.69% VALU: 1092468 -> 1092240 (-0.02%); split: -0.11%, +0.09% SALU: 132956 -> 132569 (-0.29%); split: -0.34%, +0.05% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38135>
This commit is contained in:
parent
65ba8a0e8b
commit
668259ef0b
1 changed files with 55 additions and 8 deletions
|
|
@ -94,7 +94,7 @@ struct MoveState {
|
||||||
|
|
||||||
/* for moving instructions before the current instruction to after it */
|
/* for moving instructions before the current instruction to after it */
|
||||||
DownwardsCursor downwards_init(int current_idx, bool improved_rar);
|
DownwardsCursor downwards_init(int current_idx, bool improved_rar);
|
||||||
MoveResult downwards_check_deps(Instruction* instr);
|
MoveResult downwards_check_deps(Instruction* instr, Temp* rar_dep = NULL);
|
||||||
MoveResult downwards_move(DownwardsCursor&);
|
MoveResult downwards_move(DownwardsCursor&);
|
||||||
MoveResult downwards_move_clause(DownwardsCursor&);
|
MoveResult downwards_move_clause(DownwardsCursor&);
|
||||||
void downwards_skip(DownwardsCursor&);
|
void downwards_skip(DownwardsCursor&);
|
||||||
|
|
@ -185,7 +185,7 @@ MoveState::downwards_init(int current_idx, bool improved_rar_)
|
||||||
}
|
}
|
||||||
|
|
||||||
MoveResult
|
MoveResult
|
||||||
MoveState::downwards_check_deps(Instruction* instr)
|
MoveState::downwards_check_deps(Instruction* instr, Temp* rar_dep)
|
||||||
{
|
{
|
||||||
for (const Definition& def : instr->definitions) {
|
for (const Definition& def : instr->definitions) {
|
||||||
if (def.isTemp() && depends_on[def.tempId()])
|
if (def.isTemp() && depends_on[def.tempId()])
|
||||||
|
|
@ -199,9 +199,13 @@ MoveState::downwards_check_deps(Instruction* instr)
|
||||||
if (!improved_rar && depends_on[op.tempId()])
|
if (!improved_rar && depends_on[op.tempId()])
|
||||||
return move_fail_rar;
|
return move_fail_rar;
|
||||||
|
|
||||||
if (improved_rar && rar_dependencies.count(op.tempId()))
|
if (improved_rar && rar_dependencies.count(op.tempId())) {
|
||||||
// FIXME: account for difference in register pressure
|
/* We allow for exactly one read-after-read dependency. */
|
||||||
return move_fail_rar;
|
if (rar_dep && (*rar_dep == Temp() || *rar_dep == op.getTemp()))
|
||||||
|
*rar_dep = op.getTemp();
|
||||||
|
else
|
||||||
|
return move_fail_rar;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return move_success;
|
return move_success;
|
||||||
|
|
@ -284,10 +288,11 @@ MoveState::downwards_move_clause(DownwardsCursor& cursor)
|
||||||
|
|
||||||
/* Check if one of candidates' operands is killed by depending instruction. */
|
/* Check if one of candidates' operands is killed by depending instruction. */
|
||||||
RegisterDemand max_clause_demand;
|
RegisterDemand max_clause_demand;
|
||||||
|
Temp rar_dep = Temp();
|
||||||
while (should_form_clause(block->instructions[clause_begin_idx].get(), instr)) {
|
while (should_form_clause(block->instructions[clause_begin_idx].get(), instr)) {
|
||||||
Instruction* candidate = block->instructions[clause_begin_idx--].get();
|
Instruction* candidate = block->instructions[clause_begin_idx--].get();
|
||||||
|
|
||||||
MoveResult res = downwards_check_deps(candidate);
|
MoveResult res = downwards_check_deps(candidate, &rar_dep);
|
||||||
if (res != move_success)
|
if (res != move_success)
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
|
|
@ -306,7 +311,7 @@ MoveState::downwards_move_clause(DownwardsCursor& cursor)
|
||||||
/* RegisterDemand changes caused by the clause. */
|
/* RegisterDemand changes caused by the clause. */
|
||||||
RegisterDemand clause_diff = clause_end_demand - clause_begin_demand;
|
RegisterDemand clause_diff = clause_end_demand - clause_begin_demand;
|
||||||
/* RegisterDemand changes caused by the instructions being moved over. */
|
/* RegisterDemand changes caused by the instructions being moved over. */
|
||||||
RegisterDemand insert_diff = insert_demand - clause_end_demand;
|
RegisterDemand insert_diff = insert_demand - clause_end_demand + rar_dep;
|
||||||
|
|
||||||
/* Check the new demand of the instructions being moved over. */
|
/* Check the new demand of the instructions being moved over. */
|
||||||
if (RegisterDemand(cursor.total_demand - clause_diff).exceeds(max_registers))
|
if (RegisterDemand(cursor.total_demand - clause_diff).exceeds(max_registers))
|
||||||
|
|
@ -316,11 +321,53 @@ MoveState::downwards_move_clause(DownwardsCursor& cursor)
|
||||||
if (RegisterDemand(max_clause_demand + insert_diff).exceeds(max_registers))
|
if (RegisterDemand(max_clause_demand + insert_diff).exceeds(max_registers))
|
||||||
return move_fail_pressure;
|
return move_fail_pressure;
|
||||||
|
|
||||||
|
/* Update kill flags if we move over a RAR dependency:
|
||||||
|
* The changed kill flags also affect the temp register demand, so re-calculate
|
||||||
|
* that as well.
|
||||||
|
*/
|
||||||
|
int rar_index = insert_idx;
|
||||||
|
if (rar_dep != Temp()) {
|
||||||
|
for (int i = clause_end_idx; i > clause_begin_idx; i--) {
|
||||||
|
/* Subtract the RAR temp from any clause instruction after the kill. */
|
||||||
|
instr = block->instructions[i].get();
|
||||||
|
instr->register_demand -= rar_dep;
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
for (Operand& op : instr->operands) {
|
||||||
|
if (op.isTemp() && op.getTemp() == rar_dep) {
|
||||||
|
if (first)
|
||||||
|
instr->register_demand -= get_temp_registers(instr);
|
||||||
|
op.setKill(true);
|
||||||
|
op.setFirstKill(first);
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (first == false) {
|
||||||
|
instr->register_demand += get_temp_registers(instr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rar_index = cursor.insert_idx + rar_dependencies[rar_dep.id()];
|
||||||
|
Instruction* rar_instr = block->instructions[rar_index].get();
|
||||||
|
rar_instr->register_demand -= get_temp_registers(rar_instr);
|
||||||
|
for (Operand& op : rar_instr->operands) {
|
||||||
|
if (op.isTemp() && op.getTemp() == rar_dep && !op.isCopyKill())
|
||||||
|
op.setKill(false);
|
||||||
|
}
|
||||||
|
rar_instr->register_demand += get_temp_registers(rar_instr) + rar_dep;
|
||||||
|
}
|
||||||
|
|
||||||
/* Update register demand. */
|
/* Update register demand. */
|
||||||
for (int i = clause_begin_idx + 1; i <= clause_end_idx; i++)
|
for (int i = clause_begin_idx + 1; i <= clause_end_idx; i++)
|
||||||
block->instructions[i]->register_demand += insert_diff;
|
block->instructions[i]->register_demand += insert_diff;
|
||||||
for (int i = clause_end_idx + 1; i <= insert_idx; i++)
|
for (int i = clause_end_idx + 1; i <= rar_index; i++)
|
||||||
block->instructions[i]->register_demand -= clause_diff;
|
block->instructions[i]->register_demand -= clause_diff;
|
||||||
|
for (int i = rar_index + 1; i <= insert_idx; i++) {
|
||||||
|
/* Add the RAR temp to instructions after the original kill. */
|
||||||
|
block->instructions[i]->register_demand -= clause_diff;
|
||||||
|
block->instructions[i]->register_demand += rar_dep;
|
||||||
|
}
|
||||||
|
|
||||||
/* Move the clause before the memory instruction. */
|
/* Move the clause before the memory instruction. */
|
||||||
move_element(block->instructions.begin(), clause_begin_idx + 1, cursor.insert_idx_clause,
|
move_element(block->instructions.begin(), clause_begin_idx + 1, cursor.insert_idx_clause,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue