aco: schedule LDSDIR instructions

fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
This commit is contained in:
Rhys Perry 2024-04-15 11:42:14 +01:00 committed by Marge Bot
parent 0bc8a9be67
commit 0ee4fa33bc

View file

@ -15,9 +15,11 @@
#define SMEM_WINDOW_SIZE (350 - ctx.num_waves * 35)
#define VMEM_WINDOW_SIZE (1024 - ctx.num_waves * 64)
#define LDS_WINDOW_SIZE 64
#define POS_EXP_WINDOW_SIZE 512
#define SMEM_MAX_MOVES (64 - ctx.num_waves * 4)
#define VMEM_MAX_MOVES (256 - ctx.num_waves * 16)
#define LDSDIR_MAX_MOVES 10
/* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */
#define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2)
#define VMEM_STORE_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4)
@ -979,6 +981,85 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
}
}
void
schedule_LDS(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& register_demand,
Instruction* current, int idx)
{
assert(idx != 0);
int window_size = LDS_WINDOW_SIZE;
int max_moves = LDSDIR_MAX_MOVES;
int16_t k = 0;
/* first, check if we have instructions before current to move down */
hazard_query hq;
init_hazard_query(ctx, &hq);
add_to_hazard_query(&hq, current);
DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
for (int i = 0; k < max_moves && i < window_size; i++) {
aco_ptr<Instruction>& candidate = block->instructions[cursor.source_idx];
bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
if (candidate->opcode == aco_opcode::p_logical_start || is_mem)
break;
if (candidate->isDS() || candidate->isLDSDIR()) {
add_to_hazard_query(&hq, candidate.get());
ctx.mv.downwards_skip(cursor);
continue;
}
if (perform_hazard_query(&hq, candidate.get(), false) != hazard_success ||
ctx.mv.downwards_move(cursor, false) != move_success)
break;
k++;
}
/* second, check if we have instructions after current to move up */
bool found_dependency = false;
int i = 0;
UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, true);
/* find the first instruction depending on current */
for (; k < max_moves && i < window_size; i++) {
aco_ptr<Instruction>& candidate = block->instructions[up_cursor.source_idx];
bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
if (candidate->opcode == aco_opcode::p_logical_end || is_mem)
break;
/* check if candidate depends on current */
if (!ctx.mv.upwards_check_deps(up_cursor)) {
init_hazard_query(ctx, &hq);
add_to_hazard_query(&hq, candidate.get());
ctx.mv.upwards_update_insert_idx(up_cursor);
ctx.mv.upwards_skip(up_cursor);
found_dependency = true;
i++;
break;
}
ctx.mv.upwards_skip(up_cursor);
}
for (; found_dependency && k < max_moves && i < window_size; i++) {
aco_ptr<Instruction>& candidate = block->instructions[up_cursor.source_idx];
bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
if (candidate->opcode == aco_opcode::p_logical_end || is_mem)
break;
HazardResult haz = perform_hazard_query(&hq, candidate.get(), true);
if (haz == hazard_fail_exec || haz == hazard_fail_unreorderable)
break;
if (haz != hazard_success || ctx.mv.upwards_move(up_cursor) != move_success) {
add_to_hazard_query(&hq, candidate.get());
ctx.mv.upwards_skip(up_cursor);
} else {
k++;
}
}
}
void
schedule_position_export(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& register_demand,
Instruction* current, int idx)
@ -1096,6 +1177,11 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars)
ctx.mv.current = current;
schedule_SMEM(ctx, block, live_vars.register_demand[block->index], current, idx);
}
if (current->isLDSDIR()) {
ctx.mv.current = current;
schedule_LDS(ctx, block, live_vars.register_demand[block->index], current, idx);
}
}
/* GFX11 benefits from creating VMEM store clauses. */