aco/gfx11: schedule for VMEM store clauses

fossil-db (gfx1100):
Totals from 49486 (37.09% of 133428) affected shaders:
Instrs: 18376819 -> 18480712 (+0.57%); split: -0.00%, +0.57%
CodeSize: 91810836 -> 92227292 (+0.45%); split: -0.00%, +0.45%
VGPRs: 2031824 -> 2047784 (+0.79%); split: -0.02%, +0.81%
Latency: 104259318 -> 103804792 (-0.44%); split: -0.44%, +0.00%
InvThroughput: 16388760 -> 16399819 (+0.07%); split: -0.13%, +0.19%
VClause: 568844 -> 432401 (-23.99%)
Copies: 1197942 -> 1231202 (+2.78%); split: -0.08%, +2.86%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23505>
This commit is contained in:
Rhys Perry 2023-06-07 17:08:10 +01:00 committed by Marge Bot
parent f837fec213
commit f1f01aaef5

View file

@ -1019,6 +1019,37 @@ schedule_position_export(sched_ctx& ctx, Block* block, std::vector<RegisterDeman
}
}
unsigned
schedule_VMEM_store(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& register_demand,
Instruction* current, int idx)
{
hazard_query hq;
init_hazard_query(ctx, &hq);
DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
unsigned skip = 0;
for (int i = 0; i < VMEM_CLAUSE_MAX_GRAB_DIST; i++) {
aco_ptr<Instruction>& candidate = block->instructions[cursor.source_idx];
if (candidate->opcode == aco_opcode::p_logical_start)
break;
if (!should_form_clause(current, candidate.get())) {
add_to_hazard_query(&hq, candidate.get());
ctx.mv.downwards_skip(cursor);
continue;
}
if (perform_hazard_query(&hq, candidate.get(), false) != hazard_success ||
ctx.mv.downwards_move(cursor, true) != move_success)
break;
skip++;
}
return skip;
}
void
schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars)
{
@ -1028,6 +1059,7 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars)
ctx.mv.register_demand = live_vars.register_demand[block->index].data();
/* go through all instructions and find memory loads */
unsigned num_stores = 0;
for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
Instruction* current = block->instructions[idx].get();
@ -1040,8 +1072,10 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars)
}
}
if (current->definitions.empty())
if (current->definitions.empty()) {
num_stores += current->isVMEM() || current->isFlatLike() ? 1 : 0;
continue;
}
if (current->isVMEM() || current->isFlatLike()) {
ctx.mv.current = current;
@ -1054,6 +1088,19 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars)
}
}
/* GFX11 benefits from creating VMEM store clauses. */
if (num_stores > 1 && program->gfx_level >= GFX11) {
for (int idx = block->instructions.size() - 1; idx >= 0; idx--) {
Instruction* current = block->instructions[idx].get();
if (!current->definitions.empty() || !(current->isVMEM() || current->isFlatLike()))
continue;
ctx.mv.current = current;
idx -=
schedule_VMEM_store(ctx, block, live_vars.register_demand[block->index], current, idx);
}
}
/* resummarize the block's register demand */
block->register_demand = RegisterDemand();
for (unsigned idx = 0; idx < block->instructions.size(); idx++) {