From 7e1faf9349b04d05deb8900deefa4014e1ccaa5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 19 May 2021 11:25:18 +0200 Subject: [PATCH] aco: make clause-forming depend on the number of moved instructions This allows more aggressive clause-forming in presence of larger def-use distances. To compensate for the effect, VMEM_CLAUSE_MAX_GRAB_DIST was decreased. Totals from 5788 (3.85% of 150170) affected shaders: (GFX10.3) VGPRs: 483960 -> 475272 (-1.80%); split: -1.82%, +0.02% CodeSize: 59661240 -> 59669084 (+0.01%); split: -0.01%, +0.02% MaxWaves: 70408 -> 71450 (+1.48%); split: +1.51%, -0.03% Instrs: 11222417 -> 11224479 (+0.02%); split: -0.01%, +0.03% Latency: 349397104 -> 349298602 (-0.03%); split: -0.03%, +0.00% InvThroughput: 88584832 -> 87762262 (-0.93%); split: -0.93%, +0.00% VClause: 168905 -> 177089 (+4.85%); split: -0.48%, +5.32% SClause: 375795 -> 375767 (-0.01%); split: -0.01%, +0.01% Copies: 840298 -> 840231 (-0.01%); split: -0.04%, +0.03% Branches: 373265 -> 373278 (+0.00%); split: -0.00%, +0.00% Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_scheduler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index ebf5fedfdd7..42b830826a4 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -37,7 +37,7 @@ #define SMEM_MAX_MOVES (64 - ctx.num_waves * 4) #define VMEM_MAX_MOVES (256 - ctx.num_waves * 16) /* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */ -#define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 8) +#define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4) #define POS_EXP_MAX_MOVES 512 namespace aco { @@ -822,7 +822,7 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector& registe /* We can't easily tell how much this will decrease the def-to-use * distances, so just use how far it will be moved as a heuristic. */ part_of_clause = - grab_dist < clause_max_grab_dist && should_form_clause(current, candidate.get()); + grab_dist < clause_max_grab_dist + k && should_form_clause(current, candidate.get()); } /* if current depends on candidate, add additional dependencies and continue */