From f1f01aaef53cd6a9593dade1e2fedcacbc2e45c2 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 7 Jun 2023 17:08:10 +0100 Subject: [PATCH] aco/gfx11: schedule for VMEM store clauses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fossil-db (gfx1100): Totals from 49486 (37.09% of 133428) affected shaders: Instrs: 18376819 -> 18480712 (+0.57%); split: -0.00%, +0.57% CodeSize: 91810836 -> 92227292 (+0.45%); split: -0.00%, +0.45% VGPRs: 2031824 -> 2047784 (+0.79%); split: -0.02%, +0.81% Latency: 104259318 -> 103804792 (-0.44%); split: -0.44%, +0.00% InvThroughput: 16388760 -> 16399819 (+0.07%); split: -0.13%, +0.19% VClause: 568844 -> 432401 (-23.99%) Copies: 1197942 -> 1231202 (+2.78%); split: -0.08%, +2.86% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_scheduler.cpp | 49 +++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 02498c01195..2e43fa2d62a 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -1019,6 +1019,37 @@ schedule_position_export(sched_ctx& ctx, Block* block, std::vector& register_demand, + Instruction* current, int idx) +{ + hazard_query hq; + init_hazard_query(ctx, &hq); + + DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true); + unsigned skip = 0; + + for (int i = 0; i < VMEM_CLAUSE_MAX_GRAB_DIST; i++) { + aco_ptr& candidate = block->instructions[cursor.source_idx]; + if (candidate->opcode == aco_opcode::p_logical_start) + break; + + if (!should_form_clause(current, candidate.get())) { + add_to_hazard_query(&hq, candidate.get()); + ctx.mv.downwards_skip(cursor); + continue; + } + + if (perform_hazard_query(&hq, candidate.get(), false) != hazard_success || + ctx.mv.downwards_move(cursor, true) != move_success) + break; + + skip++; + } + + return skip; +} + void schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars) { @@ -1028,6 +1059,7 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars) ctx.mv.register_demand = live_vars.register_demand[block->index].data(); /* go through all instructions and find memory loads */ + unsigned num_stores = 0; for (unsigned idx = 0; idx < block->instructions.size(); idx++) { Instruction* current = block->instructions[idx].get(); @@ -1040,8 +1072,10 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars) } } - if (current->definitions.empty()) + if (current->definitions.empty()) { + num_stores += current->isVMEM() || current->isFlatLike() ? 1 : 0; continue; + } if (current->isVMEM() || current->isFlatLike()) { ctx.mv.current = current; @@ -1054,6 +1088,19 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars) } } + /* GFX11 benefits from creating VMEM store clauses. */ + if (num_stores > 1 && program->gfx_level >= GFX11) { + for (int idx = block->instructions.size() - 1; idx >= 0; idx--) { + Instruction* current = block->instructions[idx].get(); + if (!current->definitions.empty() || !(current->isVMEM() || current->isFlatLike())) + continue; + + ctx.mv.current = current; + idx -= + schedule_VMEM_store(ctx, block, live_vars.register_demand[block->index], current, idx); + } + } + /* resummarize the block's register demand */ block->register_demand = RegisterDemand(); for (unsigned idx = 0; idx < block->instructions.size(); idx++) {