From 88e03feb279563f5aaedc26dcc80e7aea91ec7cc Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 18 Apr 2024 14:55:20 +0100 Subject: [PATCH] aco: schedule LDS instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fossil-db (navi31): Totals from 1823 (2.30% of 79395) affected shaders: MaxWaves: 53845 -> 53827 (-0.03%); split: +0.02%, -0.05% Instrs: 1736317 -> 1731200 (-0.29%); split: -0.38%, +0.09% CodeSize: 8876760 -> 8857908 (-0.21%); split: -0.29%, +0.08% VGPRs: 91688 -> 92276 (+0.64%); split: -0.03%, +0.67% Latency: 11743095 -> 11698872 (-0.38%); split: -0.42%, +0.04% InvThroughput: 2070526 -> 2067440 (-0.15%); split: -0.17%, +0.02% VClause: 39048 -> 39058 (+0.03%); split: -0.01%, +0.03% SClause: 35371 -> 35406 (+0.10%); split: -0.02%, +0.12% Copies: 104335 -> 104384 (+0.05%); split: -0.21%, +0.26% Branches: 29769 -> 29794 (+0.08%); split: -0.00%, +0.09% VALU: 970925 -> 970974 (+0.01%); split: -0.01%, +0.02% SALU: 146222 -> 146345 (+0.08%); split: -0.01%, +0.09% VOPD: 1119 -> 1162 (+3.84%); split: +4.29%, -0.45% fossil-db (navi21): Totals from 37078 (46.70% of 79395) affected shaders: MaxWaves: 990093 -> 990025 (-0.01%) Instrs: 21130662 -> 21182543 (+0.25%); split: -0.01%, +0.26% CodeSize: 110205364 -> 110415032 (+0.19%); split: -0.01%, +0.20% VGPRs: 1407168 -> 1410768 (+0.26%) Latency: 90024839 -> 89929196 (-0.11%); split: -0.11%, +0.01% InvThroughput: 17170356 -> 17167412 (-0.02%); split: -0.02%, +0.00% VClause: 392830 -> 392825 (-0.00%); split: -0.01%, +0.01% SClause: 463150 -> 463188 (+0.01%); split: -0.00%, +0.01% Copies: 1768433 -> 1768483 (+0.00%); split: -0.02%, +0.02% Branches: 605989 -> 606011 (+0.00%); split: -0.00%, +0.00% VALU: 11614810 -> 11614912 (+0.00%); split: -0.00%, +0.00% SALU: 3794531 -> 3794655 (+0.00%); split: -0.00%, +0.00% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_scheduler.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 40b0f838d31..004d442b6f9 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -20,6 +20,7 @@ #define SMEM_MAX_MOVES (64 - ctx.num_waves * 4) #define VMEM_MAX_MOVES (256 - ctx.num_waves * 16) #define LDSDIR_MAX_MOVES 10 +#define LDS_MAX_MOVES 32 /* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */ #define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2) #define VMEM_STORE_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4) @@ -987,7 +988,7 @@ schedule_LDS(sched_ctx& ctx, Block* block, std::vector& register { assert(idx != 0); int window_size = LDS_WINDOW_SIZE; - int max_moves = LDSDIR_MAX_MOVES; + int max_moves = current->isLDSDIR() ? LDSDIR_MAX_MOVES : LDS_MAX_MOVES; int16_t k = 0; /* first, check if we have instructions before current to move down */ @@ -1178,7 +1179,7 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars) schedule_SMEM(ctx, block, live_vars.register_demand[block->index], current, idx); } - if (current->isLDSDIR()) { + if (current->isLDSDIR() || (current->isDS() && !current->ds().gds)) { ctx.mv.current = current; schedule_LDS(ctx, block, live_vars.register_demand[block->index], current, idx); }