2019-09-17 13:22:17 +02:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2018 Valve Corporation
|
|
|
|
|
*
|
2024-04-08 09:02:30 +02:00
|
|
|
* SPDX-License-Identifier: MIT
|
2019-09-17 13:22:17 +02:00
|
|
|
*/
|
|
|
|
|
|
2019-10-14 17:46:02 +01:00
|
|
|
#include "aco_builder.h"
|
2021-06-09 15:40:03 +02:00
|
|
|
#include "aco_ir.h"
|
2021-06-10 11:33:15 +02:00
|
|
|
|
|
|
|
|
#include "common/amdgfxregs.h"
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2021-06-09 15:40:03 +02:00
|
|
|
#include <algorithm>
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
aco/scheduler: improve scheduling heuristic
The heuristic we are currently using still stems from the GCN era
with the only adjustments being made for RDNA was to double (or triple)
the wave count.
This rewrite aims to detangle some concepts and provide more consistent results.
- wave_factor: The purpose of this value is to reflect that RDNA SIMDs can
accomodate twice as many waves as GCN SIMDs.
- reg_file_multiple: This value accounts for the larger register file of wave32
and some RDNA3 families.
- wave_minimum: Below this value, we don't sacrifice any waves. It corresponds
to a register demand of 64 VGPRs in wave64.
- occupancy_factor: Depending on target_waves and wave_factor, this controls
the scheduling window sizes and number of moves.
The main differences from the previous heuristic is a lower wave minimum and
a slightly less aggressive reduction of waves.
It also increases SMEM_MAX_MOVES in order to mitigate some of the changes
from targeting less waves.
Totals from 62777 (78.63% of 79839) affected shaders: (Navi48)
MaxWaves: 1880983 -> 1848028 (-1.75%); split: +0.01%, -1.76%
Instrs: 40904711 -> 40800797 (-0.25%); split: -0.39%, +0.14%
CodeSize: 217132208 -> 216748832 (-0.18%); split: -0.29%, +0.12%
VGPRs: 3019304 -> 3099596 (+2.66%); split: -0.11%, +2.77%
Latency: 268857129 -> 265951122 (-1.08%); split: -1.33%, +0.25%
InvThroughput: 40960938 -> 41044533 (+0.20%); split: -0.18%, +0.39%
VClause: 794000 -> 782913 (-1.40%); split: -2.24%, +0.84%
SClause: 1192476 -> 1150831 (-3.49%); split: -3.94%, +0.45%
Copies: 2720470 -> 2700148 (-0.75%); split: -1.84%, +1.09%
Branches: 785926 -> 785951 (+0.00%); split: -0.01%, +0.01%
VALU: 22918411 -> 22890189 (-0.12%); split: -0.19%, +0.06%
SALU: 5281201 -> 5289486 (+0.16%); split: -0.21%, +0.36%
VOPD: 8790 -> 8685 (-1.19%); split: +1.08%, -2.28%
Totals from 62081 (77.77% of 79825) affected shaders: (Navi31)
MaxWaves: 1848555 -> 1812347 (-1.96%); split: +0.01%, -1.97%
Instrs: 39794460 -> 39704180 (-0.23%); split: -0.39%, +0.16%
CodeSize: 208987052 -> 208621524 (-0.17%); split: -0.31%, +0.13%
VGPRs: 3046284 -> 3135156 (+2.92%); split: -0.11%, +3.03%
Latency: 268863465 -> 265218186 (-1.36%); split: -1.59%, +0.23%
InvThroughput: 41101515 -> 41167075 (+0.16%); split: -0.22%, +0.38%
VClause: 795316 -> 774899 (-2.57%); split: -3.17%, +0.61%
SClause: 1177294 -> 1135451 (-3.55%); split: -4.06%, +0.51%
Copies: 2743254 -> 2725127 (-0.66%); split: -1.90%, +1.24%
Branches: 801395 -> 801428 (+0.00%); split: -0.01%, +0.02%
VALU: 23898938 -> 23871294 (-0.12%); split: -0.20%, +0.08%
SALU: 3908807 -> 3919130 (+0.26%); split: -0.23%, +0.50%
VOPD: 8529 -> 8500 (-0.34%); split: +1.29%, -1.63%
Totals from 44996 (71.01% of 63370) affected shaders: (Vega10)
MaxWaves: 307074 -> 304808 (-0.74%); split: +0.63%, -1.37%
Instrs: 22743534 -> 22716240 (-0.12%); split: -0.22%, +0.10%
CodeSize: 117284856 -> 117173212 (-0.10%); split: -0.19%, +0.09%
SGPRs: 3249008 -> 3330480 (+2.51%); split: -0.36%, +2.87%
VGPRs: 1901400 -> 1943880 (+2.23%); split: -0.60%, +2.83%
Latency: 224839126 -> 222878477 (-0.87%); split: -1.19%, +0.31%
InvThroughput: 114389570 -> 114316559 (-0.06%); split: -0.17%, +0.11%
VClause: 482012 -> 473304 (-1.81%); split: -2.86%, +1.05%
SClause: 757799 -> 717092 (-5.37%); split: -5.64%, +0.27%
Copies: 2182735 -> 2183598 (+0.04%); split: -1.17%, +1.21%
Branches: 396026 -> 395996 (-0.01%); split: -0.03%, +0.02%
VALU: 16740283 -> 16728098 (-0.07%); split: -0.14%, +0.07%
SALU: 2133575 -> 2145863 (+0.58%); split: -0.29%, +0.86%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30720>
2024-07-24 11:51:31 +02:00
|
|
|
#define SMEM_WINDOW_SIZE (256 - ctx.occupancy_factor * 16)
|
|
|
|
|
#define VMEM_WINDOW_SIZE (1024 - ctx.occupancy_factor * 64)
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
#define LDS_WINDOW_SIZE 64
|
2019-09-17 13:22:17 +02:00
|
|
|
#define POS_EXP_WINDOW_SIZE 512
|
aco/scheduler: improve scheduling heuristic
The heuristic we are currently using still stems from the GCN era
with the only adjustments being made for RDNA was to double (or triple)
the wave count.
This rewrite aims to detangle some concepts and provide more consistent results.
- wave_factor: The purpose of this value is to reflect that RDNA SIMDs can
accomodate twice as many waves as GCN SIMDs.
- reg_file_multiple: This value accounts for the larger register file of wave32
and some RDNA3 families.
- wave_minimum: Below this value, we don't sacrifice any waves. It corresponds
to a register demand of 64 VGPRs in wave64.
- occupancy_factor: Depending on target_waves and wave_factor, this controls
the scheduling window sizes and number of moves.
The main differences from the previous heuristic is a lower wave minimum and
a slightly less aggressive reduction of waves.
It also increases SMEM_MAX_MOVES in order to mitigate some of the changes
from targeting less waves.
Totals from 62777 (78.63% of 79839) affected shaders: (Navi48)
MaxWaves: 1880983 -> 1848028 (-1.75%); split: +0.01%, -1.76%
Instrs: 40904711 -> 40800797 (-0.25%); split: -0.39%, +0.14%
CodeSize: 217132208 -> 216748832 (-0.18%); split: -0.29%, +0.12%
VGPRs: 3019304 -> 3099596 (+2.66%); split: -0.11%, +2.77%
Latency: 268857129 -> 265951122 (-1.08%); split: -1.33%, +0.25%
InvThroughput: 40960938 -> 41044533 (+0.20%); split: -0.18%, +0.39%
VClause: 794000 -> 782913 (-1.40%); split: -2.24%, +0.84%
SClause: 1192476 -> 1150831 (-3.49%); split: -3.94%, +0.45%
Copies: 2720470 -> 2700148 (-0.75%); split: -1.84%, +1.09%
Branches: 785926 -> 785951 (+0.00%); split: -0.01%, +0.01%
VALU: 22918411 -> 22890189 (-0.12%); split: -0.19%, +0.06%
SALU: 5281201 -> 5289486 (+0.16%); split: -0.21%, +0.36%
VOPD: 8790 -> 8685 (-1.19%); split: +1.08%, -2.28%
Totals from 62081 (77.77% of 79825) affected shaders: (Navi31)
MaxWaves: 1848555 -> 1812347 (-1.96%); split: +0.01%, -1.97%
Instrs: 39794460 -> 39704180 (-0.23%); split: -0.39%, +0.16%
CodeSize: 208987052 -> 208621524 (-0.17%); split: -0.31%, +0.13%
VGPRs: 3046284 -> 3135156 (+2.92%); split: -0.11%, +3.03%
Latency: 268863465 -> 265218186 (-1.36%); split: -1.59%, +0.23%
InvThroughput: 41101515 -> 41167075 (+0.16%); split: -0.22%, +0.38%
VClause: 795316 -> 774899 (-2.57%); split: -3.17%, +0.61%
SClause: 1177294 -> 1135451 (-3.55%); split: -4.06%, +0.51%
Copies: 2743254 -> 2725127 (-0.66%); split: -1.90%, +1.24%
Branches: 801395 -> 801428 (+0.00%); split: -0.01%, +0.02%
VALU: 23898938 -> 23871294 (-0.12%); split: -0.20%, +0.08%
SALU: 3908807 -> 3919130 (+0.26%); split: -0.23%, +0.50%
VOPD: 8529 -> 8500 (-0.34%); split: +1.29%, -1.63%
Totals from 44996 (71.01% of 63370) affected shaders: (Vega10)
MaxWaves: 307074 -> 304808 (-0.74%); split: +0.63%, -1.37%
Instrs: 22743534 -> 22716240 (-0.12%); split: -0.22%, +0.10%
CodeSize: 117284856 -> 117173212 (-0.10%); split: -0.19%, +0.09%
SGPRs: 3249008 -> 3330480 (+2.51%); split: -0.36%, +2.87%
VGPRs: 1901400 -> 1943880 (+2.23%); split: -0.60%, +2.83%
Latency: 224839126 -> 222878477 (-0.87%); split: -1.19%, +0.31%
InvThroughput: 114389570 -> 114316559 (-0.06%); split: -0.17%, +0.11%
VClause: 482012 -> 473304 (-1.81%); split: -2.86%, +1.05%
SClause: 757799 -> 717092 (-5.37%); split: -5.64%, +0.27%
Copies: 2182735 -> 2183598 (+0.04%); split: -1.17%, +1.21%
Branches: 396026 -> 395996 (-0.01%); split: -0.03%, +0.02%
VALU: 16740283 -> 16728098 (-0.07%); split: -0.14%, +0.07%
SALU: 2133575 -> 2145863 (+0.58%); split: -0.29%, +0.86%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30720>
2024-07-24 11:51:31 +02:00
|
|
|
#define SMEM_MAX_MOVES (128 - ctx.occupancy_factor * 8)
|
|
|
|
|
#define VMEM_MAX_MOVES (256 - ctx.occupancy_factor * 16)
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
#define LDSDIR_MAX_MOVES 10
|
aco: schedule LDS instructions
fossil-db (navi31):
Totals from 1823 (2.30% of 79395) affected shaders:
MaxWaves: 53845 -> 53827 (-0.03%); split: +0.02%, -0.05%
Instrs: 1736317 -> 1731200 (-0.29%); split: -0.38%, +0.09%
CodeSize: 8876760 -> 8857908 (-0.21%); split: -0.29%, +0.08%
VGPRs: 91688 -> 92276 (+0.64%); split: -0.03%, +0.67%
Latency: 11743095 -> 11698872 (-0.38%); split: -0.42%, +0.04%
InvThroughput: 2070526 -> 2067440 (-0.15%); split: -0.17%, +0.02%
VClause: 39048 -> 39058 (+0.03%); split: -0.01%, +0.03%
SClause: 35371 -> 35406 (+0.10%); split: -0.02%, +0.12%
Copies: 104335 -> 104384 (+0.05%); split: -0.21%, +0.26%
Branches: 29769 -> 29794 (+0.08%); split: -0.00%, +0.09%
VALU: 970925 -> 970974 (+0.01%); split: -0.01%, +0.02%
SALU: 146222 -> 146345 (+0.08%); split: -0.01%, +0.09%
VOPD: 1119 -> 1162 (+3.84%); split: +4.29%, -0.45%
fossil-db (navi21):
Totals from 37078 (46.70% of 79395) affected shaders:
MaxWaves: 990093 -> 990025 (-0.01%)
Instrs: 21130662 -> 21182543 (+0.25%); split: -0.01%, +0.26%
CodeSize: 110205364 -> 110415032 (+0.19%); split: -0.01%, +0.20%
VGPRs: 1407168 -> 1410768 (+0.26%)
Latency: 90024839 -> 89929196 (-0.11%); split: -0.11%, +0.01%
InvThroughput: 17170356 -> 17167412 (-0.02%); split: -0.02%, +0.00%
VClause: 392830 -> 392825 (-0.00%); split: -0.01%, +0.01%
SClause: 463150 -> 463188 (+0.01%); split: -0.00%, +0.01%
Copies: 1768433 -> 1768483 (+0.00%); split: -0.02%, +0.02%
Branches: 605989 -> 606011 (+0.00%); split: -0.00%, +0.00%
VALU: 11614810 -> 11614912 (+0.00%); split: -0.00%, +0.00%
SALU: 3794531 -> 3794655 (+0.00%); split: -0.00%, +0.00%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-18 14:55:20 +01:00
|
|
|
#define LDS_MAX_MOVES 32
|
2019-10-18 13:05:00 +01:00
|
|
|
/* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */
|
aco/scheduler: improve scheduling heuristic
The heuristic we are currently using still stems from the GCN era
with the only adjustments being made for RDNA was to double (or triple)
the wave count.
This rewrite aims to detangle some concepts and provide more consistent results.
- wave_factor: The purpose of this value is to reflect that RDNA SIMDs can
accomodate twice as many waves as GCN SIMDs.
- reg_file_multiple: This value accounts for the larger register file of wave32
and some RDNA3 families.
- wave_minimum: Below this value, we don't sacrifice any waves. It corresponds
to a register demand of 64 VGPRs in wave64.
- occupancy_factor: Depending on target_waves and wave_factor, this controls
the scheduling window sizes and number of moves.
The main differences from the previous heuristic is a lower wave minimum and
a slightly less aggressive reduction of waves.
It also increases SMEM_MAX_MOVES in order to mitigate some of the changes
from targeting less waves.
Totals from 62777 (78.63% of 79839) affected shaders: (Navi48)
MaxWaves: 1880983 -> 1848028 (-1.75%); split: +0.01%, -1.76%
Instrs: 40904711 -> 40800797 (-0.25%); split: -0.39%, +0.14%
CodeSize: 217132208 -> 216748832 (-0.18%); split: -0.29%, +0.12%
VGPRs: 3019304 -> 3099596 (+2.66%); split: -0.11%, +2.77%
Latency: 268857129 -> 265951122 (-1.08%); split: -1.33%, +0.25%
InvThroughput: 40960938 -> 41044533 (+0.20%); split: -0.18%, +0.39%
VClause: 794000 -> 782913 (-1.40%); split: -2.24%, +0.84%
SClause: 1192476 -> 1150831 (-3.49%); split: -3.94%, +0.45%
Copies: 2720470 -> 2700148 (-0.75%); split: -1.84%, +1.09%
Branches: 785926 -> 785951 (+0.00%); split: -0.01%, +0.01%
VALU: 22918411 -> 22890189 (-0.12%); split: -0.19%, +0.06%
SALU: 5281201 -> 5289486 (+0.16%); split: -0.21%, +0.36%
VOPD: 8790 -> 8685 (-1.19%); split: +1.08%, -2.28%
Totals from 62081 (77.77% of 79825) affected shaders: (Navi31)
MaxWaves: 1848555 -> 1812347 (-1.96%); split: +0.01%, -1.97%
Instrs: 39794460 -> 39704180 (-0.23%); split: -0.39%, +0.16%
CodeSize: 208987052 -> 208621524 (-0.17%); split: -0.31%, +0.13%
VGPRs: 3046284 -> 3135156 (+2.92%); split: -0.11%, +3.03%
Latency: 268863465 -> 265218186 (-1.36%); split: -1.59%, +0.23%
InvThroughput: 41101515 -> 41167075 (+0.16%); split: -0.22%, +0.38%
VClause: 795316 -> 774899 (-2.57%); split: -3.17%, +0.61%
SClause: 1177294 -> 1135451 (-3.55%); split: -4.06%, +0.51%
Copies: 2743254 -> 2725127 (-0.66%); split: -1.90%, +1.24%
Branches: 801395 -> 801428 (+0.00%); split: -0.01%, +0.02%
VALU: 23898938 -> 23871294 (-0.12%); split: -0.20%, +0.08%
SALU: 3908807 -> 3919130 (+0.26%); split: -0.23%, +0.50%
VOPD: 8529 -> 8500 (-0.34%); split: +1.29%, -1.63%
Totals from 44996 (71.01% of 63370) affected shaders: (Vega10)
MaxWaves: 307074 -> 304808 (-0.74%); split: +0.63%, -1.37%
Instrs: 22743534 -> 22716240 (-0.12%); split: -0.22%, +0.10%
CodeSize: 117284856 -> 117173212 (-0.10%); split: -0.19%, +0.09%
SGPRs: 3249008 -> 3330480 (+2.51%); split: -0.36%, +2.87%
VGPRs: 1901400 -> 1943880 (+2.23%); split: -0.60%, +2.83%
Latency: 224839126 -> 222878477 (-0.87%); split: -1.19%, +0.31%
InvThroughput: 114389570 -> 114316559 (-0.06%); split: -0.17%, +0.11%
VClause: 482012 -> 473304 (-1.81%); split: -2.86%, +1.05%
SClause: 757799 -> 717092 (-5.37%); split: -5.64%, +0.27%
Copies: 2182735 -> 2183598 (+0.04%); split: -1.17%, +1.21%
Branches: 396026 -> 395996 (-0.01%); split: -0.03%, +0.02%
VALU: 16740283 -> 16728098 (-0.07%); split: -0.14%, +0.07%
SALU: 2133575 -> 2145863 (+0.58%); split: -0.29%, +0.86%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30720>
2024-07-24 11:51:31 +02:00
|
|
|
#define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.occupancy_factor * 2)
|
|
|
|
|
#define VMEM_STORE_CLAUSE_MAX_GRAB_DIST (ctx.occupancy_factor * 4)
|
2019-09-17 13:22:17 +02:00
|
|
|
#define POS_EXP_MAX_MOVES 512
|
|
|
|
|
|
|
|
|
|
namespace aco {
|
|
|
|
|
|
2024-06-15 16:17:29 +02:00
|
|
|
namespace {
|
|
|
|
|
|
2019-11-06 16:38:57 +00:00
|
|
|
enum MoveResult {
|
|
|
|
|
move_success,
|
|
|
|
|
move_fail_ssa,
|
|
|
|
|
move_fail_rar,
|
|
|
|
|
move_fail_pressure,
|
|
|
|
|
};
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
/**
|
|
|
|
|
* Cursor for downwards moves, where a single instruction is moved towards
|
|
|
|
|
* or below a group of instruction that hardware can execute as a clause.
|
|
|
|
|
*/
|
|
|
|
|
struct DownwardsCursor {
|
|
|
|
|
int source_idx; /* Current instruction to consider for moving */
|
|
|
|
|
|
|
|
|
|
int insert_idx_clause; /* First clause instruction */
|
|
|
|
|
int insert_idx; /* First instruction *after* the clause */
|
|
|
|
|
|
|
|
|
|
/* Maximum demand of instructions from source_idx to insert_idx_clause (both exclusive) */
|
|
|
|
|
RegisterDemand total_demand;
|
2024-04-19 11:43:00 +02:00
|
|
|
/* Register demand immediately before the insert_idx. */
|
|
|
|
|
RegisterDemand insert_demand;
|
2021-06-07 12:02:43 +02:00
|
|
|
|
2025-08-01 11:38:06 +02:00
|
|
|
DownwardsCursor(int current_idx)
|
|
|
|
|
: source_idx(current_idx - 1), insert_idx_clause(current_idx), insert_idx(current_idx + 1)
|
2021-06-07 12:02:43 +02:00
|
|
|
{}
|
|
|
|
|
|
2024-06-21 15:45:22 +02:00
|
|
|
void verify_invariants(const Block* block);
|
2021-06-07 12:02:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Cursor for upwards moves, where a single instruction is moved below
|
|
|
|
|
* another instruction.
|
|
|
|
|
*/
|
|
|
|
|
struct UpwardsCursor {
|
|
|
|
|
int source_idx; /* Current instruction to consider for moving */
|
|
|
|
|
int insert_idx; /* Instruction to move in front of */
|
|
|
|
|
|
|
|
|
|
/* Maximum demand of instructions from insert_idx (inclusive) to source_idx (exclusive) */
|
|
|
|
|
RegisterDemand total_demand;
|
2024-04-18 16:58:11 +02:00
|
|
|
/* Register demand immediately before the first use instruction. */
|
|
|
|
|
RegisterDemand insert_demand;
|
2021-06-07 12:02:43 +02:00
|
|
|
|
|
|
|
|
UpwardsCursor(int source_idx_) : source_idx(source_idx_)
|
|
|
|
|
{
|
|
|
|
|
insert_idx = -1; /* to be initialized later */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool has_insert_idx() const { return insert_idx != -1; }
|
2024-06-21 15:45:22 +02:00
|
|
|
void verify_invariants(const Block* block);
|
2021-06-07 12:02:43 +02:00
|
|
|
};
|
|
|
|
|
|
2019-11-06 16:38:57 +00:00
|
|
|
struct MoveState {
|
|
|
|
|
RegisterDemand max_registers;
|
|
|
|
|
|
|
|
|
|
Block* block;
|
|
|
|
|
Instruction* current;
|
|
|
|
|
bool improved_rar;
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
std::vector<bool> depends_on;
|
2019-11-06 16:38:57 +00:00
|
|
|
/* Two are needed because, for downwards VMEM scheduling, one needs to
|
|
|
|
|
* exclude the instructions in the clause, since new instructions in the
|
|
|
|
|
* clause are not moved past any other instructions in the clause. */
|
2019-09-17 13:22:17 +02:00
|
|
|
std::vector<bool> RAR_dependencies;
|
2019-11-06 16:38:57 +00:00
|
|
|
std::vector<bool> RAR_dependencies_clause;
|
|
|
|
|
|
|
|
|
|
/* for moving instructions before the current instruction to after it */
|
2021-06-07 12:02:43 +02:00
|
|
|
DownwardsCursor downwards_init(int current_idx, bool improved_rar, bool may_form_clauses);
|
2025-08-01 11:11:41 +02:00
|
|
|
MoveResult downwards_move(DownwardsCursor&);
|
|
|
|
|
MoveResult downwards_move_clause(DownwardsCursor&);
|
2021-06-07 12:02:43 +02:00
|
|
|
void downwards_skip(DownwardsCursor&);
|
2019-11-06 16:38:57 +00:00
|
|
|
|
|
|
|
|
/* for moving instructions after the first use of the current instruction upwards */
|
2021-06-07 12:02:43 +02:00
|
|
|
UpwardsCursor upwards_init(int source_idx, bool improved_rar);
|
|
|
|
|
bool upwards_check_deps(UpwardsCursor&);
|
|
|
|
|
void upwards_update_insert_idx(UpwardsCursor&);
|
|
|
|
|
MoveResult upwards_move(UpwardsCursor&);
|
|
|
|
|
void upwards_skip(UpwardsCursor&);
|
2019-11-06 16:38:57 +00:00
|
|
|
};
|
2019-10-18 13:05:00 +01:00
|
|
|
|
2019-11-06 16:38:57 +00:00
|
|
|
struct sched_ctx {
|
2022-07-21 15:45:11 +01:00
|
|
|
amd_gfx_level gfx_level;
|
2025-09-09 10:03:59 +01:00
|
|
|
Program* program;
|
aco/scheduler: improve scheduling heuristic
The heuristic we are currently using still stems from the GCN era
with the only adjustments being made for RDNA was to double (or triple)
the wave count.
This rewrite aims to detangle some concepts and provide more consistent results.
- wave_factor: The purpose of this value is to reflect that RDNA SIMDs can
accomodate twice as many waves as GCN SIMDs.
- reg_file_multiple: This value accounts for the larger register file of wave32
and some RDNA3 families.
- wave_minimum: Below this value, we don't sacrifice any waves. It corresponds
to a register demand of 64 VGPRs in wave64.
- occupancy_factor: Depending on target_waves and wave_factor, this controls
the scheduling window sizes and number of moves.
The main differences from the previous heuristic is a lower wave minimum and
a slightly less aggressive reduction of waves.
It also increases SMEM_MAX_MOVES in order to mitigate some of the changes
from targeting less waves.
Totals from 62777 (78.63% of 79839) affected shaders: (Navi48)
MaxWaves: 1880983 -> 1848028 (-1.75%); split: +0.01%, -1.76%
Instrs: 40904711 -> 40800797 (-0.25%); split: -0.39%, +0.14%
CodeSize: 217132208 -> 216748832 (-0.18%); split: -0.29%, +0.12%
VGPRs: 3019304 -> 3099596 (+2.66%); split: -0.11%, +2.77%
Latency: 268857129 -> 265951122 (-1.08%); split: -1.33%, +0.25%
InvThroughput: 40960938 -> 41044533 (+0.20%); split: -0.18%, +0.39%
VClause: 794000 -> 782913 (-1.40%); split: -2.24%, +0.84%
SClause: 1192476 -> 1150831 (-3.49%); split: -3.94%, +0.45%
Copies: 2720470 -> 2700148 (-0.75%); split: -1.84%, +1.09%
Branches: 785926 -> 785951 (+0.00%); split: -0.01%, +0.01%
VALU: 22918411 -> 22890189 (-0.12%); split: -0.19%, +0.06%
SALU: 5281201 -> 5289486 (+0.16%); split: -0.21%, +0.36%
VOPD: 8790 -> 8685 (-1.19%); split: +1.08%, -2.28%
Totals from 62081 (77.77% of 79825) affected shaders: (Navi31)
MaxWaves: 1848555 -> 1812347 (-1.96%); split: +0.01%, -1.97%
Instrs: 39794460 -> 39704180 (-0.23%); split: -0.39%, +0.16%
CodeSize: 208987052 -> 208621524 (-0.17%); split: -0.31%, +0.13%
VGPRs: 3046284 -> 3135156 (+2.92%); split: -0.11%, +3.03%
Latency: 268863465 -> 265218186 (-1.36%); split: -1.59%, +0.23%
InvThroughput: 41101515 -> 41167075 (+0.16%); split: -0.22%, +0.38%
VClause: 795316 -> 774899 (-2.57%); split: -3.17%, +0.61%
SClause: 1177294 -> 1135451 (-3.55%); split: -4.06%, +0.51%
Copies: 2743254 -> 2725127 (-0.66%); split: -1.90%, +1.24%
Branches: 801395 -> 801428 (+0.00%); split: -0.01%, +0.02%
VALU: 23898938 -> 23871294 (-0.12%); split: -0.20%, +0.08%
SALU: 3908807 -> 3919130 (+0.26%); split: -0.23%, +0.50%
VOPD: 8529 -> 8500 (-0.34%); split: +1.29%, -1.63%
Totals from 44996 (71.01% of 63370) affected shaders: (Vega10)
MaxWaves: 307074 -> 304808 (-0.74%); split: +0.63%, -1.37%
Instrs: 22743534 -> 22716240 (-0.12%); split: -0.22%, +0.10%
CodeSize: 117284856 -> 117173212 (-0.10%); split: -0.19%, +0.09%
SGPRs: 3249008 -> 3330480 (+2.51%); split: -0.36%, +2.87%
VGPRs: 1901400 -> 1943880 (+2.23%); split: -0.60%, +2.83%
Latency: 224839126 -> 222878477 (-0.87%); split: -1.19%, +0.31%
InvThroughput: 114389570 -> 114316559 (-0.06%); split: -0.17%, +0.11%
VClause: 482012 -> 473304 (-1.81%); split: -2.86%, +1.05%
SClause: 757799 -> 717092 (-5.37%); split: -5.64%, +0.27%
Copies: 2182735 -> 2183598 (+0.04%); split: -1.17%, +1.21%
Branches: 396026 -> 395996 (-0.01%); split: -0.03%, +0.02%
VALU: 16740283 -> 16728098 (-0.07%); split: -0.14%, +0.07%
SALU: 2133575 -> 2145863 (+0.58%); split: -0.29%, +0.86%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30720>
2024-07-24 11:51:31 +02:00
|
|
|
int16_t occupancy_factor;
|
2019-09-17 13:22:17 +02:00
|
|
|
int16_t last_SMEM_stall;
|
|
|
|
|
int last_SMEM_dep_idx;
|
aco/scheduler: schedule VMEM store clauses during the regular forward pass
Totals from 1456 (1.82% of 79839) affected shaders: (Navi48)
MaxWaves: 37780 -> 37128 (-1.73%); split: +0.15%, -1.87%
Instrs: 3788175 -> 3788435 (+0.01%); split: -0.04%, +0.04%
CodeSize: 20468648 -> 20467432 (-0.01%); split: -0.04%, +0.03%
VGPRs: 86820 -> 91440 (+5.32%); split: -0.10%, +5.42%
Latency: 26866232 -> 26858867 (-0.03%); split: -0.04%, +0.01%
InvThroughput: 3491741 -> 3828339 (+9.64%); split: -0.02%, +9.66%
VClause: 90413 -> 89426 (-1.09%); split: -1.27%, +0.18%
SClause: 130532 -> 130530 (-0.00%); split: -0.00%, +0.00%
Copies: 347397 -> 347806 (+0.12%); split: -0.11%, +0.23%
Branches: 117476 -> 117496 (+0.02%)
VALU: 1897427 -> 1897830 (+0.02%); split: -0.02%, +0.04%
SALU: 602365 -> 602379 (+0.00%)
VOPD: 1259 -> 1251 (-0.64%); split: +0.24%, -0.87%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-08-05 15:37:10 +02:00
|
|
|
int last_VMEM_store_idx;
|
2019-11-06 16:38:57 +00:00
|
|
|
MoveState mv;
|
2021-07-05 15:26:18 +02:00
|
|
|
bool schedule_pos_exports = true;
|
|
|
|
|
unsigned schedule_pos_export_div = 1;
|
2019-09-17 13:22:17 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* This scheduler is a simple bottom-up pass based on ideas from
|
|
|
|
|
* "A Novel Lightweight Instruction Scheduling Algorithm for Just-In-Time Compiler"
|
|
|
|
|
* from Xiaohua Shi and Peng Guo.
|
|
|
|
|
* The basic approach is to iterate over all instructions. When a memory instruction
|
|
|
|
|
* is encountered it tries to move independent instructions from above and below
|
|
|
|
|
* between the memory instruction and it's first user.
|
|
|
|
|
* The novelty is that this scheduler cares for the current register pressure:
|
|
|
|
|
* Instructions will only be moved if the register pressure won't exceed a certain bound.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
2019-11-06 16:38:57 +00:00
|
|
|
void
|
2025-08-01 16:58:01 +02:00
|
|
|
move_element(T begin_it, size_t idx, size_t before, int num = 1)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
2019-09-17 13:22:17 +02:00
|
|
|
if (idx < before) {
|
2019-11-06 16:38:57 +00:00
|
|
|
auto begin = std::next(begin_it, idx);
|
|
|
|
|
auto end = std::next(begin_it, before);
|
2025-08-01 16:58:01 +02:00
|
|
|
std::rotate(begin, begin + num, end);
|
2019-09-17 13:22:17 +02:00
|
|
|
} else if (idx > before) {
|
2019-11-06 16:38:57 +00:00
|
|
|
auto begin = std::next(begin_it, before);
|
|
|
|
|
auto end = std::next(begin_it, idx + 1);
|
2025-08-01 16:58:01 +02:00
|
|
|
std::rotate(begin, end - num, end);
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
void
|
2024-06-21 15:45:22 +02:00
|
|
|
DownwardsCursor::verify_invariants(const Block* block)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
2021-05-07 12:10:52 +02:00
|
|
|
assert(source_idx < insert_idx_clause);
|
|
|
|
|
assert(insert_idx_clause < insert_idx);
|
|
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
RegisterDemand reference_demand;
|
|
|
|
|
for (int i = source_idx + 1; i < insert_idx_clause; ++i) {
|
2024-06-21 15:45:22 +02:00
|
|
|
reference_demand.update(block->instructions[i]->register_demand);
|
2021-05-07 12:10:52 +02:00
|
|
|
}
|
2021-05-10 11:54:21 +02:00
|
|
|
assert(total_demand == reference_demand);
|
2021-05-07 12:10:52 +02:00
|
|
|
#endif
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
DownwardsCursor
|
|
|
|
|
MoveState::downwards_init(int current_idx, bool improved_rar_, bool may_form_clauses)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
|
|
|
|
improved_rar = improved_rar_;
|
|
|
|
|
|
|
|
|
|
std::fill(depends_on.begin(), depends_on.end(), false);
|
|
|
|
|
if (improved_rar) {
|
|
|
|
|
std::fill(RAR_dependencies.begin(), RAR_dependencies.end(), false);
|
|
|
|
|
if (may_form_clauses)
|
|
|
|
|
std::fill(RAR_dependencies_clause.begin(), RAR_dependencies_clause.end(), false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const Operand& op : current->operands) {
|
|
|
|
|
if (op.isTemp()) {
|
|
|
|
|
depends_on[op.tempId()] = true;
|
|
|
|
|
if (improved_rar && op.isFirstKill())
|
|
|
|
|
RAR_dependencies[op.tempId()] = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-01 11:38:06 +02:00
|
|
|
DownwardsCursor cursor(current_idx);
|
2024-04-19 11:43:00 +02:00
|
|
|
RegisterDemand temp = get_temp_registers(block->instructions[cursor.insert_idx - 1].get());
|
|
|
|
|
cursor.insert_demand = block->instructions[cursor.insert_idx - 1]->register_demand - temp;
|
|
|
|
|
|
2024-06-21 15:45:22 +02:00
|
|
|
cursor.verify_invariants(block);
|
2021-06-07 12:02:43 +02:00
|
|
|
return cursor;
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
|
|
|
|
|
2025-07-31 09:49:26 +02:00
|
|
|
bool
|
|
|
|
|
check_dependencies(Instruction* instr, std::vector<bool>& def_dep, std::vector<bool>& op_dep)
|
|
|
|
|
{
|
|
|
|
|
for (const Definition& def : instr->definitions) {
|
|
|
|
|
if (def.isTemp() && def_dep[def.tempId()])
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
for (const Operand& op : instr->operands) {
|
|
|
|
|
if (op.isTemp() && op_dep[op.tempId()]) {
|
|
|
|
|
// FIXME: account for difference in register pressure
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-01 11:11:41 +02:00
|
|
|
/* The instruction at source_idx is moved below the instruction at insert_idx. */
|
2021-06-07 12:02:43 +02:00
|
|
|
MoveResult
|
2025-08-01 11:11:41 +02:00
|
|
|
MoveState::downwards_move(DownwardsCursor& cursor)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
2025-07-31 09:49:26 +02:00
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[cursor.source_idx];
|
2019-11-06 16:38:57 +00:00
|
|
|
|
|
|
|
|
/* check if one of candidate's operands is killed by depending instruction */
|
2025-08-01 11:11:41 +02:00
|
|
|
std::vector<bool>& RAR_deps = improved_rar ? RAR_dependencies : depends_on;
|
|
|
|
|
if (check_dependencies(candidate.get(), depends_on, RAR_deps))
|
|
|
|
|
return move_fail_ssa;
|
|
|
|
|
|
2025-08-01 11:38:06 +02:00
|
|
|
/* Check the new demand of the instructions being moved over:
|
|
|
|
|
* total_demand doesn't include the current clause which consists of exactly 1 instruction.
|
|
|
|
|
*/
|
2025-08-01 11:11:41 +02:00
|
|
|
RegisterDemand register_pressure = cursor.total_demand;
|
2025-08-01 11:38:06 +02:00
|
|
|
assert(cursor.insert_idx_clause == (cursor.insert_idx - 1));
|
|
|
|
|
register_pressure.update(block->instructions[cursor.insert_idx_clause]->register_demand);
|
2025-08-01 11:11:41 +02:00
|
|
|
const RegisterDemand candidate_diff = get_live_changes(candidate.get());
|
|
|
|
|
if (RegisterDemand(register_pressure - candidate_diff).exceeds(max_registers))
|
|
|
|
|
return move_fail_pressure;
|
|
|
|
|
|
|
|
|
|
/* New demand for the moved instruction */
|
|
|
|
|
const RegisterDemand temp = get_temp_registers(candidate.get());
|
|
|
|
|
const RegisterDemand insert_demand = cursor.insert_demand;
|
|
|
|
|
const RegisterDemand new_demand = insert_demand + temp;
|
|
|
|
|
if (new_demand.exceeds(max_registers))
|
|
|
|
|
return move_fail_pressure;
|
|
|
|
|
|
|
|
|
|
/* move the candidate below the memory load */
|
|
|
|
|
move_element(block->instructions.begin(), cursor.source_idx, cursor.insert_idx);
|
|
|
|
|
cursor.insert_idx--;
|
|
|
|
|
cursor.insert_idx_clause--;
|
|
|
|
|
|
|
|
|
|
/* update register pressure */
|
|
|
|
|
for (int i = cursor.source_idx; i < cursor.insert_idx; i++)
|
|
|
|
|
block->instructions[i]->register_demand -= candidate_diff;
|
|
|
|
|
block->instructions[cursor.insert_idx]->register_demand = new_demand;
|
|
|
|
|
if (cursor.source_idx != cursor.insert_idx_clause) {
|
|
|
|
|
/* Update demand if we moved over any instructions before the clause */
|
|
|
|
|
cursor.total_demand -= candidate_diff;
|
2025-07-31 09:49:26 +02:00
|
|
|
} else {
|
2025-08-01 11:11:41 +02:00
|
|
|
assert(cursor.total_demand == RegisterDemand{});
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
|
|
|
|
|
2025-08-01 11:11:41 +02:00
|
|
|
cursor.insert_demand -= candidate_diff;
|
|
|
|
|
|
|
|
|
|
cursor.source_idx--;
|
|
|
|
|
cursor.verify_invariants(block);
|
|
|
|
|
return move_success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The current clause is extended by moving the instruction at source_idx
|
|
|
|
|
* in front of the clause.
|
|
|
|
|
*/
|
|
|
|
|
MoveResult
|
|
|
|
|
MoveState::downwards_move_clause(DownwardsCursor& cursor)
|
|
|
|
|
{
|
|
|
|
|
assert(improved_rar);
|
2025-08-01 11:51:37 +02:00
|
|
|
if (cursor.source_idx == cursor.insert_idx_clause - 1) {
|
|
|
|
|
cursor.insert_idx_clause--;
|
|
|
|
|
cursor.source_idx--;
|
|
|
|
|
return move_success;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-01 16:58:01 +02:00
|
|
|
int clause_begin_idx = cursor.source_idx; /* exclusive */
|
|
|
|
|
int clause_end_idx = cursor.source_idx; /* inclusive */
|
|
|
|
|
int insert_idx = cursor.insert_idx_clause - 1;
|
2025-08-01 11:11:41 +02:00
|
|
|
|
2025-08-01 16:58:01 +02:00
|
|
|
/* Check if one of candidates' operands is killed by depending instruction. */
|
|
|
|
|
Instruction* instr = block->instructions[cursor.insert_idx_clause].get();
|
|
|
|
|
RegisterDemand max_clause_demand;
|
|
|
|
|
while (should_form_clause(block->instructions[clause_begin_idx].get(), instr)) {
|
|
|
|
|
Instruction* candidate = block->instructions[clause_begin_idx--].get();
|
|
|
|
|
|
|
|
|
|
if (check_dependencies(candidate, depends_on, RAR_dependencies_clause))
|
2025-08-01 11:11:41 +02:00
|
|
|
return move_fail_ssa;
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2025-08-01 16:58:01 +02:00
|
|
|
max_clause_demand.update(candidate->register_demand);
|
2021-05-10 11:54:21 +02:00
|
|
|
}
|
2025-08-01 16:58:01 +02:00
|
|
|
int clause_size = clause_end_idx - clause_begin_idx;
|
|
|
|
|
assert(clause_size > 0);
|
|
|
|
|
|
|
|
|
|
instr = block->instructions[clause_begin_idx].get();
|
|
|
|
|
RegisterDemand clause_begin_demand = instr->register_demand - get_temp_registers(instr);
|
|
|
|
|
instr = block->instructions[clause_end_idx].get();
|
|
|
|
|
RegisterDemand clause_end_demand = instr->register_demand - get_temp_registers(instr);
|
|
|
|
|
instr = block->instructions[insert_idx].get();
|
|
|
|
|
RegisterDemand insert_demand = instr->register_demand - get_temp_registers(instr);
|
|
|
|
|
|
|
|
|
|
/* RegisterDemand changes caused by the clause. */
|
|
|
|
|
RegisterDemand clause_diff = clause_end_demand - clause_begin_demand;
|
|
|
|
|
/* RegisterDemand changes caused by the instructions being moved over. */
|
|
|
|
|
RegisterDemand insert_diff = insert_demand - clause_end_demand;
|
|
|
|
|
|
|
|
|
|
/* Check the new demand of the instructions being moved over. */
|
|
|
|
|
if (RegisterDemand(cursor.total_demand - clause_diff).exceeds(max_registers))
|
2019-11-06 16:38:57 +00:00
|
|
|
return move_fail_pressure;
|
2021-05-10 11:54:21 +02:00
|
|
|
|
2025-08-01 16:58:01 +02:00
|
|
|
/* Check max demand for the moved clause instructions. */
|
|
|
|
|
if (RegisterDemand(max_clause_demand + insert_diff).exceeds(max_registers))
|
2019-11-06 16:38:57 +00:00
|
|
|
return move_fail_pressure;
|
|
|
|
|
|
2025-08-01 16:58:01 +02:00
|
|
|
/* Update register demand. */
|
|
|
|
|
for (int i = clause_begin_idx + 1; i <= clause_end_idx; i++)
|
|
|
|
|
block->instructions[i]->register_demand += insert_diff;
|
|
|
|
|
for (int i = clause_end_idx + 1; i <= insert_idx; i++)
|
|
|
|
|
block->instructions[i]->register_demand -= clause_diff;
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2025-08-01 16:58:01 +02:00
|
|
|
/* Move the clause before the memory instruction. */
|
|
|
|
|
move_element(block->instructions.begin(), clause_begin_idx + 1, cursor.insert_idx_clause,
|
|
|
|
|
clause_size);
|
2025-08-01 11:51:37 +02:00
|
|
|
|
2025-08-01 16:58:01 +02:00
|
|
|
cursor.source_idx = clause_begin_idx;
|
|
|
|
|
cursor.insert_idx_clause -= clause_size;
|
|
|
|
|
cursor.total_demand -= clause_diff;
|
2019-11-06 16:38:57 +00:00
|
|
|
|
|
|
|
|
return move_success;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
void
|
|
|
|
|
MoveState::downwards_skip(DownwardsCursor& cursor)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
2021-06-07 12:02:43 +02:00
|
|
|
aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
|
2019-11-06 16:38:57 +00:00
|
|
|
|
|
|
|
|
for (const Operand& op : instr->operands) {
|
|
|
|
|
if (op.isTemp()) {
|
|
|
|
|
depends_on[op.tempId()] = true;
|
|
|
|
|
if (improved_rar && op.isFirstKill()) {
|
|
|
|
|
RAR_dependencies[op.tempId()] = true;
|
|
|
|
|
RAR_dependencies_clause[op.tempId()] = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-06-21 15:45:22 +02:00
|
|
|
cursor.total_demand.update(instr->register_demand);
|
2021-06-07 12:02:43 +02:00
|
|
|
cursor.source_idx--;
|
2024-06-21 15:45:22 +02:00
|
|
|
cursor.verify_invariants(block);
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
void
|
2024-06-21 15:45:22 +02:00
|
|
|
UpwardsCursor::verify_invariants(const Block* block)
|
2021-06-07 12:02:43 +02:00
|
|
|
{
|
2021-05-07 12:10:52 +02:00
|
|
|
#ifndef NDEBUG
|
2021-06-07 12:02:43 +02:00
|
|
|
if (!has_insert_idx()) {
|
2021-05-07 12:10:52 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(insert_idx < source_idx);
|
|
|
|
|
|
|
|
|
|
RegisterDemand reference_demand;
|
|
|
|
|
for (int i = insert_idx; i < source_idx; ++i) {
|
2024-06-21 15:45:22 +02:00
|
|
|
reference_demand.update(block->instructions[i]->register_demand);
|
2021-05-07 12:10:52 +02:00
|
|
|
}
|
|
|
|
|
assert(total_demand == reference_demand);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
UpwardsCursor
|
|
|
|
|
MoveState::upwards_init(int source_idx, bool improved_rar_)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
|
|
|
|
improved_rar = improved_rar_;
|
|
|
|
|
|
|
|
|
|
std::fill(depends_on.begin(), depends_on.end(), false);
|
|
|
|
|
std::fill(RAR_dependencies.begin(), RAR_dependencies.end(), false);
|
|
|
|
|
|
|
|
|
|
for (const Definition& def : current->definitions) {
|
|
|
|
|
if (def.isTemp())
|
|
|
|
|
depends_on[def.tempId()] = true;
|
|
|
|
|
}
|
2021-06-07 12:02:43 +02:00
|
|
|
|
|
|
|
|
return UpwardsCursor(source_idx);
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
bool
|
|
|
|
|
MoveState::upwards_check_deps(UpwardsCursor& cursor)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
2021-06-07 12:02:43 +02:00
|
|
|
aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
|
2019-11-06 16:38:57 +00:00
|
|
|
for (const Operand& op : instr->operands) {
|
|
|
|
|
if (op.isTemp() && depends_on[op.tempId()])
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
void
|
|
|
|
|
MoveState::upwards_update_insert_idx(UpwardsCursor& cursor)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
2021-06-07 12:02:43 +02:00
|
|
|
cursor.insert_idx = cursor.source_idx;
|
2024-06-21 15:45:22 +02:00
|
|
|
cursor.total_demand = block->instructions[cursor.insert_idx]->register_demand;
|
2024-04-18 16:58:11 +02:00
|
|
|
const RegisterDemand temp = get_temp_registers(block->instructions[cursor.insert_idx - 1].get());
|
|
|
|
|
cursor.insert_demand = block->instructions[cursor.insert_idx - 1]->register_demand - temp;
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
MoveResult
|
|
|
|
|
MoveState::upwards_move(UpwardsCursor& cursor)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
2021-06-07 12:02:43 +02:00
|
|
|
assert(cursor.has_insert_idx());
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
|
2019-11-06 16:38:57 +00:00
|
|
|
for (const Operand& op : instr->operands) {
|
|
|
|
|
if (op.isTemp() && depends_on[op.tempId()])
|
|
|
|
|
return move_fail_ssa;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* check if candidate uses/kills an operand which is used by a dependency */
|
|
|
|
|
for (const Operand& op : instr->operands) {
|
|
|
|
|
if (op.isTemp() && (!improved_rar || op.isFirstKill()) && RAR_dependencies[op.tempId()])
|
|
|
|
|
return move_fail_rar;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* check if register pressure is low enough: the diff is negative if register pressure is
|
|
|
|
|
* decreased */
|
2024-07-05 11:54:16 +02:00
|
|
|
const RegisterDemand candidate_diff = get_live_changes(instr.get());
|
|
|
|
|
const RegisterDemand temp = get_temp_registers(instr.get());
|
2021-06-07 12:02:43 +02:00
|
|
|
if (RegisterDemand(cursor.total_demand + candidate_diff).exceeds(max_registers))
|
2019-11-06 16:38:57 +00:00
|
|
|
return move_fail_pressure;
|
2024-04-18 16:58:11 +02:00
|
|
|
const RegisterDemand new_demand = cursor.insert_demand + candidate_diff + temp;
|
2019-11-06 16:38:57 +00:00
|
|
|
if (new_demand.exceeds(max_registers))
|
|
|
|
|
return move_fail_pressure;
|
|
|
|
|
|
|
|
|
|
/* move the candidate above the insert_idx */
|
2021-06-07 12:02:43 +02:00
|
|
|
move_element(block->instructions.begin(), cursor.source_idx, cursor.insert_idx);
|
2019-11-06 16:38:57 +00:00
|
|
|
|
|
|
|
|
/* update register pressure */
|
2024-06-21 15:45:22 +02:00
|
|
|
block->instructions[cursor.insert_idx]->register_demand = new_demand;
|
2021-06-07 12:02:43 +02:00
|
|
|
for (int i = cursor.insert_idx + 1; i <= cursor.source_idx; i++)
|
2024-06-21 15:45:22 +02:00
|
|
|
block->instructions[i]->register_demand += candidate_diff;
|
2021-06-07 12:02:43 +02:00
|
|
|
cursor.total_demand += candidate_diff;
|
2024-04-18 16:58:11 +02:00
|
|
|
cursor.insert_demand += candidate_diff;
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
cursor.insert_idx++;
|
|
|
|
|
cursor.source_idx++;
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2024-06-21 15:45:22 +02:00
|
|
|
cursor.verify_invariants(block);
|
2021-05-07 12:10:52 +02:00
|
|
|
|
2019-11-06 16:38:57 +00:00
|
|
|
return move_success;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
void
|
|
|
|
|
MoveState::upwards_skip(UpwardsCursor& cursor)
|
2019-11-06 16:38:57 +00:00
|
|
|
{
|
2021-06-07 12:02:43 +02:00
|
|
|
if (cursor.has_insert_idx()) {
|
|
|
|
|
aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
|
2019-11-06 16:38:57 +00:00
|
|
|
for (const Definition& def : instr->definitions) {
|
|
|
|
|
if (def.isTemp())
|
|
|
|
|
depends_on[def.tempId()] = true;
|
|
|
|
|
}
|
|
|
|
|
for (const Operand& op : instr->operands) {
|
|
|
|
|
if (op.isTemp())
|
|
|
|
|
RAR_dependencies[op.tempId()] = true;
|
|
|
|
|
}
|
2024-06-21 15:45:22 +02:00
|
|
|
cursor.total_demand.update(instr->register_demand);
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
cursor.source_idx++;
|
2021-05-07 12:10:52 +02:00
|
|
|
|
2024-06-21 15:45:22 +02:00
|
|
|
cursor.verify_invariants(block);
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
|
|
|
|
|
2020-06-26 15:54:22 +01:00
|
|
|
memory_sync_info
|
|
|
|
|
get_sync_info_with_hack(const Instruction* instr)
|
2019-10-14 17:46:02 +01:00
|
|
|
{
|
2020-06-26 15:54:22 +01:00
|
|
|
memory_sync_info sync = get_sync_info(instr);
|
2021-01-20 15:27:16 +00:00
|
|
|
if (instr->isSMEM() && !instr->operands.empty() && instr->operands[0].bytes() == 16) {
|
2020-06-26 15:54:22 +01:00
|
|
|
// FIXME: currently, it doesn't seem beneficial to omit this due to how our scheduler works
|
|
|
|
|
sync.storage = (storage_class)(sync.storage | storage_buffer);
|
2020-08-27 12:11:46 +01:00
|
|
|
sync.semantics =
|
|
|
|
|
(memory_semantics)((sync.semantics | semantic_private) & ~semantic_can_reorder);
|
2019-10-14 17:46:02 +01:00
|
|
|
}
|
2020-06-26 15:54:22 +01:00
|
|
|
return sync;
|
2019-10-14 17:46:02 +01:00
|
|
|
}
|
|
|
|
|
|
2025-08-20 22:47:46 +02:00
|
|
|
bool
|
|
|
|
|
is_reorderable(const Instruction* instr)
|
|
|
|
|
{
|
|
|
|
|
return instr->opcode != aco_opcode::s_memtime && instr->opcode != aco_opcode::s_memrealtime &&
|
|
|
|
|
instr->opcode != aco_opcode::s_setprio && instr->opcode != aco_opcode::s_getreg_b32 &&
|
|
|
|
|
instr->opcode != aco_opcode::p_shader_cycles_hi_lo_hi &&
|
|
|
|
|
instr->opcode != aco_opcode::p_init_scratch &&
|
|
|
|
|
instr->opcode != aco_opcode::p_jump_to_epilog &&
|
|
|
|
|
instr->opcode != aco_opcode::s_sendmsg_rtn_b32 &&
|
|
|
|
|
instr->opcode != aco_opcode::s_sendmsg_rtn_b64 &&
|
|
|
|
|
instr->opcode != aco_opcode::p_end_with_regs && instr->opcode != aco_opcode::s_nop &&
|
|
|
|
|
instr->opcode != aco_opcode::s_sleep && instr->opcode != aco_opcode::s_trap &&
|
|
|
|
|
instr->opcode != aco_opcode::p_call && instr->opcode != aco_opcode::p_logical_start &&
|
|
|
|
|
instr->opcode != aco_opcode::p_logical_end;
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-26 15:54:22 +01:00
|
|
|
struct memory_event_set {
|
|
|
|
|
bool has_control_barrier;
|
|
|
|
|
|
|
|
|
|
unsigned bar_acquire;
|
|
|
|
|
unsigned bar_release;
|
|
|
|
|
unsigned bar_classes;
|
|
|
|
|
|
|
|
|
|
unsigned access_acquire;
|
|
|
|
|
unsigned access_release;
|
|
|
|
|
unsigned access_relaxed;
|
|
|
|
|
unsigned access_atomic;
|
|
|
|
|
};
|
2019-11-07 14:48:51 +00:00
|
|
|
|
|
|
|
|
struct hazard_query {
|
2025-09-09 10:03:59 +01:00
|
|
|
Program* program;
|
2022-07-21 15:45:11 +01:00
|
|
|
amd_gfx_level gfx_level;
|
2019-11-07 14:48:51 +00:00
|
|
|
bool contains_spill;
|
2020-06-26 15:54:22 +01:00
|
|
|
bool contains_sendmsg;
|
aco: allow to schedule SALU/SMEM through exec changes
Totals from 16794 (12.05% of 139391) affected shaders (NAVI10):
SGPRs: 757760 -> 762048 (+0.57%); split: -0.39%, +0.95%
VGPRs: 402844 -> 402744 (-0.02%); split: -0.04%, +0.02%
CodeSize: 22290900 -> 22285068 (-0.03%); split: -0.06%, +0.04%
MaxWaves: 294163 -> 294222 (+0.02%); split: +0.03%, -0.01%
Instrs: 4190074 -> 4188513 (-0.04%); split: -0.08%, +0.04%
Cycles: 40685028 -> 40678640 (-0.02%); split: -0.03%, +0.02%
VMEM: 7711867 -> 7704315 (-0.10%); split: +0.28%, -0.38%
SMEM: 942472 -> 1007052 (+6.85%); split: +7.15%, -0.30%
VClause: 92990 -> 92974 (-0.02%); split: -0.03%, +0.01%
SClause: 263700 -> 263810 (+0.04%); split: -0.38%, +0.42%
Copies: 277467 -> 276988 (-0.17%); split: -0.37%, +0.20%
Branches: 45899 -> 45896 (-0.01%)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7903>
2020-08-12 16:16:46 +02:00
|
|
|
bool uses_exec;
|
2022-02-11 19:19:45 +00:00
|
|
|
bool writes_exec;
|
2020-06-26 15:54:22 +01:00
|
|
|
memory_event_set mem_events;
|
|
|
|
|
unsigned aliasing_storage; /* storage classes which are accessed (non-SMEM) */
|
|
|
|
|
unsigned aliasing_storage_smem; /* storage classes which are accessed (SMEM) */
|
2019-11-07 14:48:51 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
void
|
2022-07-21 15:45:11 +01:00
|
|
|
init_hazard_query(const sched_ctx& ctx, hazard_query* query)
|
2019-11-07 14:48:51 +00:00
|
|
|
{
|
2025-09-09 10:03:59 +01:00
|
|
|
query->program = ctx.program;
|
2022-07-21 15:45:11 +01:00
|
|
|
query->gfx_level = ctx.gfx_level;
|
2019-11-07 14:48:51 +00:00
|
|
|
query->contains_spill = false;
|
2020-06-26 15:54:22 +01:00
|
|
|
query->contains_sendmsg = false;
|
aco: allow to schedule SALU/SMEM through exec changes
Totals from 16794 (12.05% of 139391) affected shaders (NAVI10):
SGPRs: 757760 -> 762048 (+0.57%); split: -0.39%, +0.95%
VGPRs: 402844 -> 402744 (-0.02%); split: -0.04%, +0.02%
CodeSize: 22290900 -> 22285068 (-0.03%); split: -0.06%, +0.04%
MaxWaves: 294163 -> 294222 (+0.02%); split: +0.03%, -0.01%
Instrs: 4190074 -> 4188513 (-0.04%); split: -0.08%, +0.04%
Cycles: 40685028 -> 40678640 (-0.02%); split: -0.03%, +0.02%
VMEM: 7711867 -> 7704315 (-0.10%); split: +0.28%, -0.38%
SMEM: 942472 -> 1007052 (+6.85%); split: +7.15%, -0.30%
VClause: 92990 -> 92974 (-0.02%); split: -0.03%, +0.01%
SClause: 263700 -> 263810 (+0.04%); split: -0.38%, +0.42%
Copies: 277467 -> 276988 (-0.17%); split: -0.37%, +0.20%
Branches: 45899 -> 45896 (-0.01%)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7903>
2020-08-12 16:16:46 +02:00
|
|
|
query->uses_exec = false;
|
2022-02-11 19:19:45 +00:00
|
|
|
query->writes_exec = false;
|
2020-06-26 15:54:22 +01:00
|
|
|
memset(&query->mem_events, 0, sizeof(query->mem_events));
|
|
|
|
|
query->aliasing_storage = 0;
|
|
|
|
|
query->aliasing_storage_smem = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
2025-09-09 10:03:59 +01:00
|
|
|
add_memory_event(Program* program, memory_event_set* set, Instruction* instr,
|
2022-07-21 15:45:11 +01:00
|
|
|
memory_sync_info* sync)
|
2020-06-26 15:54:22 +01:00
|
|
|
{
|
|
|
|
|
if (instr->opcode == aco_opcode::p_barrier) {
|
2021-01-21 16:13:34 +00:00
|
|
|
Pseudo_barrier_instruction& bar = instr->barrier();
|
|
|
|
|
if (bar.sync.semantics & semantic_acquire)
|
|
|
|
|
set->bar_acquire |= bar.sync.storage;
|
|
|
|
|
if (bar.sync.semantics & semantic_release)
|
|
|
|
|
set->bar_release |= bar.sync.storage;
|
|
|
|
|
set->bar_classes |= bar.sync.storage;
|
2020-06-26 15:54:22 +01:00
|
|
|
}
|
|
|
|
|
|
2025-07-22 15:11:29 +01:00
|
|
|
if (!sync->storage) {
|
|
|
|
|
set->has_control_barrier |=
|
2025-09-09 10:03:59 +01:00
|
|
|
is_atomic_or_control_instr(program, instr, *sync, semantic_acquire | semantic_release) !=
|
2025-07-22 15:11:29 +01:00
|
|
|
0;
|
2020-06-26 15:54:22 +01:00
|
|
|
return;
|
2025-07-22 15:11:29 +01:00
|
|
|
}
|
2020-06-26 15:54:22 +01:00
|
|
|
|
|
|
|
|
if (sync->semantics & semantic_acquire)
|
|
|
|
|
set->access_acquire |= sync->storage;
|
|
|
|
|
if (sync->semantics & semantic_release)
|
|
|
|
|
set->access_release |= sync->storage;
|
|
|
|
|
|
|
|
|
|
if (!(sync->semantics & semantic_private)) {
|
|
|
|
|
if (sync->semantics & semantic_atomic)
|
|
|
|
|
set->access_atomic |= sync->storage;
|
|
|
|
|
else
|
|
|
|
|
set->access_relaxed |= sync->storage;
|
|
|
|
|
}
|
2019-11-07 14:48:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
add_to_hazard_query(hazard_query* query, Instruction* instr)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
2019-11-07 14:48:51 +00:00
|
|
|
if (instr->opcode == aco_opcode::p_spill || instr->opcode == aco_opcode::p_reload)
|
|
|
|
|
query->contains_spill = true;
|
2020-06-26 15:54:22 +01:00
|
|
|
query->contains_sendmsg |= instr->opcode == aco_opcode::s_sendmsg;
|
aco: allow to schedule SALU/SMEM through exec changes
Totals from 16794 (12.05% of 139391) affected shaders (NAVI10):
SGPRs: 757760 -> 762048 (+0.57%); split: -0.39%, +0.95%
VGPRs: 402844 -> 402744 (-0.02%); split: -0.04%, +0.02%
CodeSize: 22290900 -> 22285068 (-0.03%); split: -0.06%, +0.04%
MaxWaves: 294163 -> 294222 (+0.02%); split: +0.03%, -0.01%
Instrs: 4190074 -> 4188513 (-0.04%); split: -0.08%, +0.04%
Cycles: 40685028 -> 40678640 (-0.02%); split: -0.03%, +0.02%
VMEM: 7711867 -> 7704315 (-0.10%); split: +0.28%, -0.38%
SMEM: 942472 -> 1007052 (+6.85%); split: +7.15%, -0.30%
VClause: 92990 -> 92974 (-0.02%); split: -0.03%, +0.01%
SClause: 263700 -> 263810 (+0.04%); split: -0.38%, +0.42%
Copies: 277467 -> 276988 (-0.17%); split: -0.37%, +0.20%
Branches: 45899 -> 45896 (-0.01%)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7903>
2020-08-12 16:16:46 +02:00
|
|
|
query->uses_exec |= needs_exec_mask(instr);
|
2022-02-11 19:19:45 +00:00
|
|
|
for (const Definition& def : instr->definitions) {
|
|
|
|
|
if (def.isFixed() && def.physReg() == exec)
|
|
|
|
|
query->writes_exec = true;
|
|
|
|
|
}
|
2020-06-26 15:54:22 +01:00
|
|
|
|
|
|
|
|
memory_sync_info sync = get_sync_info_with_hack(instr);
|
2019-11-07 14:48:51 +00:00
|
|
|
|
2025-09-09 10:03:59 +01:00
|
|
|
add_memory_event(query->program, &query->mem_events, instr, &sync);
|
2020-06-26 15:54:22 +01:00
|
|
|
|
|
|
|
|
if (!(sync.semantics & semantic_can_reorder)) {
|
|
|
|
|
unsigned storage = sync.storage;
|
|
|
|
|
/* images and buffer/global memory can alias */ // TODO: more precisely, buffer images and
|
|
|
|
|
// buffer/global memory can alias
|
|
|
|
|
if (storage & (storage_buffer | storage_image))
|
|
|
|
|
storage |= storage_buffer | storage_image;
|
2021-01-20 15:27:16 +00:00
|
|
|
if (instr->isSMEM())
|
2020-06-26 15:54:22 +01:00
|
|
|
query->aliasing_storage_smem |= storage;
|
|
|
|
|
else
|
|
|
|
|
query->aliasing_storage |= storage;
|
|
|
|
|
}
|
2019-11-07 14:48:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum HazardResult {
|
|
|
|
|
hazard_success,
|
|
|
|
|
hazard_fail_reorder_vmem_smem,
|
|
|
|
|
hazard_fail_reorder_ds,
|
|
|
|
|
hazard_fail_reorder_sendmsg,
|
|
|
|
|
hazard_fail_spill,
|
2020-02-11 14:15:32 +00:00
|
|
|
hazard_fail_export,
|
|
|
|
|
hazard_fail_barrier,
|
2019-11-07 14:48:51 +00:00
|
|
|
/* Must stop at these failures. The hazard query code doesn't consider them
|
|
|
|
|
* when added. */
|
|
|
|
|
hazard_fail_exec,
|
2020-04-01 15:38:43 +02:00
|
|
|
hazard_fail_unreorderable,
|
2019-11-07 14:48:51 +00:00
|
|
|
};
|
|
|
|
|
|
2020-06-26 15:54:22 +01:00
|
|
|
HazardResult
|
|
|
|
|
perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
|
2019-11-07 14:48:51 +00:00
|
|
|
{
|
aco: allow to schedule SALU/SMEM through exec changes
Totals from 16794 (12.05% of 139391) affected shaders (NAVI10):
SGPRs: 757760 -> 762048 (+0.57%); split: -0.39%, +0.95%
VGPRs: 402844 -> 402744 (-0.02%); split: -0.04%, +0.02%
CodeSize: 22290900 -> 22285068 (-0.03%); split: -0.06%, +0.04%
MaxWaves: 294163 -> 294222 (+0.02%); split: +0.03%, -0.01%
Instrs: 4190074 -> 4188513 (-0.04%); split: -0.08%, +0.04%
Cycles: 40685028 -> 40678640 (-0.02%); split: -0.03%, +0.02%
VMEM: 7711867 -> 7704315 (-0.10%); split: +0.28%, -0.38%
SMEM: 942472 -> 1007052 (+6.85%); split: +7.15%, -0.30%
VClause: 92990 -> 92974 (-0.02%); split: -0.03%, +0.01%
SClause: 263700 -> 263810 (+0.04%); split: -0.38%, +0.42%
Copies: 277467 -> 276988 (-0.17%); split: -0.37%, +0.20%
Branches: 45899 -> 45896 (-0.01%)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7903>
2020-08-12 16:16:46 +02:00
|
|
|
/* don't schedule discards downwards */
|
2024-12-19 20:15:12 +01:00
|
|
|
if (!upwards && instr->opcode == aco_opcode::p_exit_early_if_not)
|
aco: allow to schedule SALU/SMEM through exec changes
Totals from 16794 (12.05% of 139391) affected shaders (NAVI10):
SGPRs: 757760 -> 762048 (+0.57%); split: -0.39%, +0.95%
VGPRs: 402844 -> 402744 (-0.02%); split: -0.04%, +0.02%
CodeSize: 22290900 -> 22285068 (-0.03%); split: -0.06%, +0.04%
MaxWaves: 294163 -> 294222 (+0.02%); split: +0.03%, -0.01%
Instrs: 4190074 -> 4188513 (-0.04%); split: -0.08%, +0.04%
Cycles: 40685028 -> 40678640 (-0.02%); split: -0.03%, +0.02%
VMEM: 7711867 -> 7704315 (-0.10%); split: +0.28%, -0.38%
SMEM: 942472 -> 1007052 (+6.85%); split: +7.15%, -0.30%
VClause: 92990 -> 92974 (-0.02%); split: -0.03%, +0.01%
SClause: 263700 -> 263810 (+0.04%); split: -0.38%, +0.42%
Copies: 277467 -> 276988 (-0.17%); split: -0.37%, +0.20%
Branches: 45899 -> 45896 (-0.01%)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7903>
2020-08-12 16:16:46 +02:00
|
|
|
return hazard_fail_unreorderable;
|
|
|
|
|
|
2023-04-03 21:27:47 +03:00
|
|
|
/* In Primitive Ordered Pixel Shading, await overlapped waves as late as possible, and notify
|
|
|
|
|
* overlapping waves that they can continue execution as early as possible.
|
|
|
|
|
*/
|
|
|
|
|
if (upwards) {
|
|
|
|
|
if (instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id ||
|
2024-05-27 14:23:38 +01:00
|
|
|
is_wait_export_ready(query->gfx_level, instr)) {
|
2023-04-03 21:27:47 +03:00
|
|
|
return hazard_fail_unreorderable;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) {
|
|
|
|
|
return hazard_fail_unreorderable;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-26 15:02:22 +01:00
|
|
|
if (query->uses_exec || query->writes_exec) {
|
aco: allow to schedule SALU/SMEM through exec changes
Totals from 16794 (12.05% of 139391) affected shaders (NAVI10):
SGPRs: 757760 -> 762048 (+0.57%); split: -0.39%, +0.95%
VGPRs: 402844 -> 402744 (-0.02%); split: -0.04%, +0.02%
CodeSize: 22290900 -> 22285068 (-0.03%); split: -0.06%, +0.04%
MaxWaves: 294163 -> 294222 (+0.02%); split: +0.03%, -0.01%
Instrs: 4190074 -> 4188513 (-0.04%); split: -0.08%, +0.04%
Cycles: 40685028 -> 40678640 (-0.02%); split: -0.03%, +0.02%
VMEM: 7711867 -> 7704315 (-0.10%); split: +0.28%, -0.38%
SMEM: 942472 -> 1007052 (+6.85%); split: +7.15%, -0.30%
VClause: 92990 -> 92974 (-0.02%); split: -0.03%, +0.01%
SClause: 263700 -> 263810 (+0.04%); split: -0.38%, +0.42%
Copies: 277467 -> 276988 (-0.17%); split: -0.37%, +0.20%
Branches: 45899 -> 45896 (-0.01%)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7903>
2020-08-12 16:16:46 +02:00
|
|
|
for (const Definition& def : instr->definitions) {
|
|
|
|
|
if (def.isFixed() && def.physReg() == exec)
|
|
|
|
|
return hazard_fail_exec;
|
|
|
|
|
}
|
2019-11-07 14:48:51 +00:00
|
|
|
}
|
2022-02-11 19:19:45 +00:00
|
|
|
if (query->writes_exec && needs_exec_mask(instr))
|
|
|
|
|
return hazard_fail_exec;
|
2019-11-07 14:48:51 +00:00
|
|
|
|
2023-04-03 21:27:47 +03:00
|
|
|
/* Don't move exports so that they stay closer together.
|
2023-11-21 15:24:35 +01:00
|
|
|
* Since GFX11, export order matters. MRTZ must come first,
|
|
|
|
|
* then color exports sorted from first to last.
|
2023-04-03 21:27:47 +03:00
|
|
|
* Also, with Primitive Ordered Pixel Shading on GFX11+, the `done` export must not be moved
|
|
|
|
|
* above the memory accesses before the queue family scope (more precisely, fragment interlock
|
|
|
|
|
* scope, but it's not available in ACO) release barrier that is expected to be inserted before
|
|
|
|
|
* the export, as well as before any `s_wait_event export_ready` which enters the ordered
|
|
|
|
|
* section, because the `done` export exits the ordered section.
|
|
|
|
|
*/
|
2023-11-21 15:24:35 +01:00
|
|
|
if (instr->isEXP() || instr->opcode == aco_opcode::p_dual_src_export_gfx11)
|
2020-02-11 14:15:32 +00:00
|
|
|
return hazard_fail_export;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2020-06-26 15:54:22 +01:00
|
|
|
memory_event_set instr_set;
|
|
|
|
|
memset(&instr_set, 0, sizeof(instr_set));
|
|
|
|
|
memory_sync_info sync = get_sync_info_with_hack(instr);
|
2025-09-09 10:03:59 +01:00
|
|
|
add_memory_event(query->program, &instr_set, instr, &sync);
|
2020-06-26 15:54:22 +01:00
|
|
|
|
|
|
|
|
memory_event_set* first = &instr_set;
|
|
|
|
|
memory_event_set* second = &query->mem_events;
|
|
|
|
|
if (upwards)
|
|
|
|
|
std::swap(first, second);
|
|
|
|
|
|
|
|
|
|
/* everything after barrier(acquire) happens after the atomics/control_barriers before
|
|
|
|
|
* everything after load(acquire) happens after the load
|
|
|
|
|
*/
|
|
|
|
|
if ((first->has_control_barrier || first->access_atomic) && second->bar_acquire)
|
|
|
|
|
return hazard_fail_barrier;
|
|
|
|
|
if (((first->access_acquire || first->bar_acquire) && second->bar_classes) ||
|
|
|
|
|
((first->access_acquire | first->bar_acquire) &
|
|
|
|
|
(second->access_relaxed | second->access_atomic)))
|
|
|
|
|
return hazard_fail_barrier;
|
|
|
|
|
|
|
|
|
|
/* everything before barrier(release) happens before the atomics/control_barriers after *
|
|
|
|
|
* everything before store(release) happens before the store
|
|
|
|
|
*/
|
|
|
|
|
if (first->bar_release && (second->has_control_barrier || second->access_atomic))
|
2020-05-01 11:04:27 +01:00
|
|
|
return hazard_fail_barrier;
|
2020-06-26 15:54:22 +01:00
|
|
|
if ((first->bar_classes && (second->bar_release || second->access_release)) ||
|
|
|
|
|
((first->access_relaxed | first->access_atomic) &
|
|
|
|
|
(second->bar_release | second->access_release)))
|
2020-02-11 14:15:32 +00:00
|
|
|
return hazard_fail_barrier;
|
2020-06-26 15:54:22 +01:00
|
|
|
|
|
|
|
|
/* don't move memory barriers around other memory barriers */
|
|
|
|
|
if (first->bar_classes && second->bar_classes)
|
2019-11-07 14:48:51 +00:00
|
|
|
return hazard_fail_barrier;
|
|
|
|
|
|
2020-08-04 19:20:21 +01:00
|
|
|
/* Don't move memory accesses to before control barriers. I don't think
|
|
|
|
|
* this is necessary for the Vulkan memory model, but it might be for GLSL450. */
|
|
|
|
|
unsigned control_classes =
|
2022-10-27 14:49:31 +01:00
|
|
|
storage_buffer | storage_image | storage_shared | storage_task_payload;
|
2020-08-04 19:20:21 +01:00
|
|
|
if (first->has_control_barrier &&
|
|
|
|
|
((second->access_atomic | second->access_relaxed) & control_classes))
|
|
|
|
|
return hazard_fail_barrier;
|
|
|
|
|
|
2020-06-26 15:54:22 +01:00
|
|
|
/* don't move memory loads/stores past potentially aliasing loads/stores */
|
2021-01-20 15:27:16 +00:00
|
|
|
unsigned aliasing_storage =
|
2020-06-26 15:54:22 +01:00
|
|
|
instr->isSMEM() ? query->aliasing_storage_smem : query->aliasing_storage;
|
|
|
|
|
if ((sync.storage & aliasing_storage) && !(sync.semantics & semantic_can_reorder)) {
|
|
|
|
|
unsigned intersect = sync.storage & aliasing_storage;
|
|
|
|
|
if (intersect & storage_shared)
|
|
|
|
|
return hazard_fail_reorder_ds;
|
2019-11-07 14:48:51 +00:00
|
|
|
return hazard_fail_reorder_vmem_smem;
|
2020-06-26 15:54:22 +01:00
|
|
|
}
|
2019-11-07 14:48:51 +00:00
|
|
|
|
|
|
|
|
if ((instr->opcode == aco_opcode::p_spill || instr->opcode == aco_opcode::p_reload) &&
|
|
|
|
|
query->contains_spill)
|
|
|
|
|
return hazard_fail_spill;
|
|
|
|
|
|
2020-06-26 15:54:22 +01:00
|
|
|
if (instr->opcode == aco_opcode::s_sendmsg && query->contains_sendmsg)
|
|
|
|
|
return hazard_fail_reorder_sendmsg;
|
|
|
|
|
|
2019-11-07 14:48:51 +00:00
|
|
|
return hazard_success;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
2024-04-26 11:52:13 +01:00
|
|
|
unsigned
|
|
|
|
|
get_likely_cost(Instruction* instr)
|
|
|
|
|
{
|
|
|
|
|
if (instr->opcode == aco_opcode::p_split_vector ||
|
|
|
|
|
instr->opcode == aco_opcode::p_extract_vector) {
|
|
|
|
|
unsigned cost = 0;
|
|
|
|
|
for (Definition def : instr->definitions) {
|
|
|
|
|
if (instr->operands[0].isKill() &&
|
|
|
|
|
def.regClass().type() == instr->operands[0].regClass().type())
|
|
|
|
|
continue;
|
|
|
|
|
cost += def.size();
|
|
|
|
|
}
|
|
|
|
|
return cost;
|
|
|
|
|
} else if (instr->opcode == aco_opcode::p_create_vector) {
|
|
|
|
|
unsigned cost = 0;
|
|
|
|
|
for (Operand op : instr->operands) {
|
|
|
|
|
if (op.isTemp() && op.isFirstKill() &&
|
|
|
|
|
op.regClass().type() == instr->definitions[0].regClass().type())
|
|
|
|
|
continue;
|
|
|
|
|
cost += op.size();
|
|
|
|
|
}
|
|
|
|
|
return cost;
|
|
|
|
|
} else {
|
|
|
|
|
/* For the moment, just assume the same cost for all other instructions. */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
void
|
2024-06-13 11:42:11 +02:00
|
|
|
schedule_SMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
|
|
|
|
assert(idx != 0);
|
|
|
|
|
int window_size = SMEM_WINDOW_SIZE;
|
|
|
|
|
int max_moves = SMEM_MAX_MOVES;
|
|
|
|
|
int16_t k = 0;
|
|
|
|
|
|
2019-10-10 17:04:06 +01:00
|
|
|
/* don't move s_memtime/s_memrealtime */
|
2022-10-24 02:14:24 +00:00
|
|
|
if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime ||
|
|
|
|
|
current->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
|
|
|
|
|
current->opcode == aco_opcode::s_sendmsg_rtn_b64)
|
2019-10-10 17:04:06 +01:00
|
|
|
return;
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* first, check if we have instructions before current to move down */
|
2019-11-07 14:48:51 +00:00
|
|
|
hazard_query hq;
|
2022-07-21 15:45:11 +01:00
|
|
|
init_hazard_query(ctx, &hq);
|
2019-11-07 14:48:51 +00:00
|
|
|
add_to_hazard_query(&hq, current);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
DownwardsCursor cursor = ctx.mv.downwards_init(idx, false, false);
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int)idx - window_size;
|
|
|
|
|
candidate_idx--) {
|
|
|
|
|
assert(candidate_idx >= 0);
|
2021-06-07 12:02:43 +02:00
|
|
|
assert(candidate_idx == cursor.source_idx);
|
2019-09-17 13:22:17 +02:00
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
|
|
|
|
|
|
|
|
|
/* break if we'd make the previous SMEM instruction stall */
|
|
|
|
|
bool can_stall_prev_smem =
|
|
|
|
|
idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
|
|
|
|
|
if (can_stall_prev_smem && ctx.last_SMEM_stall >= 0)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* break when encountering another MEM instruction, logical_start or barriers */
|
2025-08-20 22:47:46 +02:00
|
|
|
if (!is_reorderable(candidate.get()))
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
aco: move VMEM instructions below descriptor loads
This is to prevent sequences like:
a = descriptor_load()
vmem(a)
b = descriptor_load()
vmem(b)
and instead create:
a = descriptor_load()
b = descriptor_load()
vmem(a)
vmem(b)
fossil-db (GFX10.3):
Totals from 114521 (78.30% of 146267) affected shaders:
VGPRs: 4540352 -> 4540216 (-0.00%); split: -0.03%, +0.02%
CodeSize: 289864228 -> 289114652 (-0.26%); split: -0.29%, +0.03%
MaxWaves: 2940234 -> 2940338 (+0.00%); split: +0.00%, -0.00%
Instrs: 55112418 -> 54919910 (-0.35%); split: -0.38%, +0.03%
Latency: 956528393 -> 954682011 (-0.19%); split: -0.24%, +0.05%
InvThroughput: 229280830 -> 229238107 (-0.02%); split: -0.04%, +0.02%
VClause: 1141832 -> 1139002 (-0.25%); split: -0.63%, +0.38%
SClause: 2357840 -> 2225008 (-5.63%); split: -6.01%, +0.38%
Copies: 3316040 -> 3331519 (+0.47%); split: -0.31%, +0.77%
Branches: 1187212 -> 1186919 (-0.02%); split: -0.03%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6489>
2020-11-27 16:37:07 +00:00
|
|
|
/* only move VMEM instructions below descriptor loads. be more aggressive at higher num_waves
|
|
|
|
|
* to help create more vmem clauses */
|
2022-05-19 18:21:34 +01:00
|
|
|
if ((candidate->isVMEM() || candidate->isFlatLike()) &&
|
aco/scheduler: improve scheduling heuristic
The heuristic we are currently using still stems from the GCN era
with the only adjustments being made for RDNA was to double (or triple)
the wave count.
This rewrite aims to detangle some concepts and provide more consistent results.
- wave_factor: The purpose of this value is to reflect that RDNA SIMDs can
accomodate twice as many waves as GCN SIMDs.
- reg_file_multiple: This value accounts for the larger register file of wave32
and some RDNA3 families.
- wave_minimum: Below this value, we don't sacrifice any waves. It corresponds
to a register demand of 64 VGPRs in wave64.
- occupancy_factor: Depending on target_waves and wave_factor, this controls
the scheduling window sizes and number of moves.
The main differences from the previous heuristic is a lower wave minimum and
a slightly less aggressive reduction of waves.
It also increases SMEM_MAX_MOVES in order to mitigate some of the changes
from targeting less waves.
Totals from 62777 (78.63% of 79839) affected shaders: (Navi48)
MaxWaves: 1880983 -> 1848028 (-1.75%); split: +0.01%, -1.76%
Instrs: 40904711 -> 40800797 (-0.25%); split: -0.39%, +0.14%
CodeSize: 217132208 -> 216748832 (-0.18%); split: -0.29%, +0.12%
VGPRs: 3019304 -> 3099596 (+2.66%); split: -0.11%, +2.77%
Latency: 268857129 -> 265951122 (-1.08%); split: -1.33%, +0.25%
InvThroughput: 40960938 -> 41044533 (+0.20%); split: -0.18%, +0.39%
VClause: 794000 -> 782913 (-1.40%); split: -2.24%, +0.84%
SClause: 1192476 -> 1150831 (-3.49%); split: -3.94%, +0.45%
Copies: 2720470 -> 2700148 (-0.75%); split: -1.84%, +1.09%
Branches: 785926 -> 785951 (+0.00%); split: -0.01%, +0.01%
VALU: 22918411 -> 22890189 (-0.12%); split: -0.19%, +0.06%
SALU: 5281201 -> 5289486 (+0.16%); split: -0.21%, +0.36%
VOPD: 8790 -> 8685 (-1.19%); split: +1.08%, -2.28%
Totals from 62081 (77.77% of 79825) affected shaders: (Navi31)
MaxWaves: 1848555 -> 1812347 (-1.96%); split: +0.01%, -1.97%
Instrs: 39794460 -> 39704180 (-0.23%); split: -0.39%, +0.16%
CodeSize: 208987052 -> 208621524 (-0.17%); split: -0.31%, +0.13%
VGPRs: 3046284 -> 3135156 (+2.92%); split: -0.11%, +3.03%
Latency: 268863465 -> 265218186 (-1.36%); split: -1.59%, +0.23%
InvThroughput: 41101515 -> 41167075 (+0.16%); split: -0.22%, +0.38%
VClause: 795316 -> 774899 (-2.57%); split: -3.17%, +0.61%
SClause: 1177294 -> 1135451 (-3.55%); split: -4.06%, +0.51%
Copies: 2743254 -> 2725127 (-0.66%); split: -1.90%, +1.24%
Branches: 801395 -> 801428 (+0.00%); split: -0.01%, +0.02%
VALU: 23898938 -> 23871294 (-0.12%); split: -0.20%, +0.08%
SALU: 3908807 -> 3919130 (+0.26%); split: -0.23%, +0.50%
VOPD: 8529 -> 8500 (-0.34%); split: +1.29%, -1.63%
Totals from 44996 (71.01% of 63370) affected shaders: (Vega10)
MaxWaves: 307074 -> 304808 (-0.74%); split: +0.63%, -1.37%
Instrs: 22743534 -> 22716240 (-0.12%); split: -0.22%, +0.10%
CodeSize: 117284856 -> 117173212 (-0.10%); split: -0.19%, +0.09%
SGPRs: 3249008 -> 3330480 (+2.51%); split: -0.36%, +2.87%
VGPRs: 1901400 -> 1943880 (+2.23%); split: -0.60%, +2.83%
Latency: 224839126 -> 222878477 (-0.87%); split: -1.19%, +0.31%
InvThroughput: 114389570 -> 114316559 (-0.06%); split: -0.17%, +0.11%
VClause: 482012 -> 473304 (-1.81%); split: -2.86%, +1.05%
SClause: 757799 -> 717092 (-5.37%); split: -5.64%, +0.27%
Copies: 2182735 -> 2183598 (+0.04%); split: -1.17%, +1.21%
Branches: 396026 -> 395996 (-0.01%); split: -0.03%, +0.02%
VALU: 16740283 -> 16728098 (-0.07%); split: -0.14%, +0.07%
SALU: 2133575 -> 2145863 (+0.58%); split: -0.29%, +0.86%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30720>
2024-07-24 11:51:31 +02:00
|
|
|
(cursor.insert_idx - cursor.source_idx > (ctx.occupancy_factor * 4) ||
|
2022-05-19 18:21:34 +01:00
|
|
|
current->operands[0].size() == 4))
|
2019-10-31 17:33:35 +01:00
|
|
|
break;
|
2020-11-27 17:26:46 +00:00
|
|
|
/* don't move descriptor loads below buffer loads */
|
2023-02-03 17:27:52 +00:00
|
|
|
if (candidate->isSMEM() && !candidate->operands.empty() && current->operands[0].size() == 4 &&
|
2020-11-27 17:26:46 +00:00
|
|
|
candidate->operands[0].size() == 2)
|
|
|
|
|
break;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2019-11-06 16:38:57 +00:00
|
|
|
bool can_move_down = true;
|
2019-11-07 14:48:51 +00:00
|
|
|
|
2020-06-26 15:54:22 +01:00
|
|
|
HazardResult haz = perform_hazard_query(&hq, candidate.get(), false);
|
2020-02-11 16:55:39 +00:00
|
|
|
if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
|
|
|
|
|
haz == hazard_fail_reorder_sendmsg || haz == hazard_fail_barrier ||
|
|
|
|
|
haz == hazard_fail_export)
|
2019-09-17 13:22:17 +02:00
|
|
|
can_move_down = false;
|
2020-02-11 14:15:32 +00:00
|
|
|
else if (haz != hazard_success)
|
2019-11-07 14:48:51 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* don't use LDS/GDS instructions to hide latency since it can
|
2023-04-12 07:10:19 +03:00
|
|
|
* significantly worsen LDS scheduling */
|
2021-01-20 15:27:16 +00:00
|
|
|
if (candidate->isDS() || !can_move_down) {
|
2019-11-07 14:48:51 +00:00
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.downwards_skip(cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-01 11:11:41 +02:00
|
|
|
MoveResult res = ctx.mv.downwards_move(cursor);
|
2019-11-06 16:38:57 +00:00
|
|
|
if (res == move_fail_ssa || res == move_fail_rar) {
|
2019-11-07 14:48:51 +00:00
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.downwards_skip(cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
2019-11-06 16:38:57 +00:00
|
|
|
} else if (res == move_fail_pressure) {
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (candidate_idx < ctx.last_SMEM_dep_idx)
|
|
|
|
|
ctx.last_SMEM_stall++;
|
|
|
|
|
k++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* find the first instruction depending on current or find another MEM */
|
2021-06-07 12:02:43 +02:00
|
|
|
UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, false);
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
bool found_dependency = false;
|
|
|
|
|
/* second, check if we have instructions after current to move up */
|
|
|
|
|
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int)idx + window_size;
|
|
|
|
|
candidate_idx++) {
|
2021-06-07 12:02:43 +02:00
|
|
|
assert(candidate_idx == up_cursor.source_idx);
|
2019-09-17 13:22:17 +02:00
|
|
|
assert(candidate_idx < (int)block->instructions.size());
|
|
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
|
|
|
|
|
2025-08-20 22:47:46 +02:00
|
|
|
if (!is_reorderable(candidate.get()))
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* check if candidate depends on current */
|
2021-06-07 12:02:43 +02:00
|
|
|
bool is_dependency = !found_dependency && !ctx.mv.upwards_check_deps(up_cursor);
|
2019-10-10 16:31:40 +02:00
|
|
|
/* no need to steal from following VMEM instructions */
|
2022-05-19 18:21:34 +01:00
|
|
|
if (is_dependency && (candidate->isVMEM() || candidate->isFlatLike()))
|
2019-10-10 16:31:40 +02:00
|
|
|
break;
|
2019-11-07 14:48:51 +00:00
|
|
|
|
|
|
|
|
if (found_dependency) {
|
2020-06-26 15:54:22 +01:00
|
|
|
HazardResult haz = perform_hazard_query(&hq, candidate.get(), true);
|
2020-02-11 14:15:32 +00:00
|
|
|
if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
|
2020-02-11 16:55:39 +00:00
|
|
|
haz == hazard_fail_reorder_sendmsg || haz == hazard_fail_barrier ||
|
|
|
|
|
haz == hazard_fail_export)
|
2019-11-07 14:48:51 +00:00
|
|
|
is_dependency = true;
|
2020-02-11 14:15:32 +00:00
|
|
|
else if (haz != hazard_success)
|
2019-11-07 14:48:51 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
if (is_dependency) {
|
|
|
|
|
if (!found_dependency) {
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.upwards_update_insert_idx(up_cursor);
|
2022-07-21 15:45:11 +01:00
|
|
|
init_hazard_query(ctx, &hq);
|
2019-09-17 13:22:17 +02:00
|
|
|
found_dependency = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-07 14:48:51 +00:00
|
|
|
if (is_dependency || !found_dependency) {
|
|
|
|
|
if (found_dependency)
|
|
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
|
|
|
|
else
|
|
|
|
|
k++;
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.upwards_skip(up_cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
2019-10-31 17:33:35 +01:00
|
|
|
}
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
MoveResult res = ctx.mv.upwards_move(up_cursor);
|
2019-11-06 16:38:57 +00:00
|
|
|
if (res == move_fail_ssa || res == move_fail_rar) {
|
|
|
|
|
/* no need to steal from following VMEM instructions */
|
2022-05-19 18:21:34 +01:00
|
|
|
if (res == move_fail_ssa && (candidate->isVMEM() || candidate->isFlatLike()))
|
2019-10-31 17:33:35 +01:00
|
|
|
break;
|
2019-11-07 14:48:51 +00:00
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.upwards_skip(up_cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
2019-11-06 16:38:57 +00:00
|
|
|
} else if (res == move_fail_pressure) {
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
k++;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.last_SMEM_dep_idx = found_dependency ? up_cursor.insert_idx : 0;
|
aco/scheduler: improve scheduling heuristic
The heuristic we are currently using still stems from the GCN era
with the only adjustments being made for RDNA was to double (or triple)
the wave count.
This rewrite aims to detangle some concepts and provide more consistent results.
- wave_factor: The purpose of this value is to reflect that RDNA SIMDs can
accomodate twice as many waves as GCN SIMDs.
- reg_file_multiple: This value accounts for the larger register file of wave32
and some RDNA3 families.
- wave_minimum: Below this value, we don't sacrifice any waves. It corresponds
to a register demand of 64 VGPRs in wave64.
- occupancy_factor: Depending on target_waves and wave_factor, this controls
the scheduling window sizes and number of moves.
The main differences from the previous heuristic is a lower wave minimum and
a slightly less aggressive reduction of waves.
It also increases SMEM_MAX_MOVES in order to mitigate some of the changes
from targeting less waves.
Totals from 62777 (78.63% of 79839) affected shaders: (Navi48)
MaxWaves: 1880983 -> 1848028 (-1.75%); split: +0.01%, -1.76%
Instrs: 40904711 -> 40800797 (-0.25%); split: -0.39%, +0.14%
CodeSize: 217132208 -> 216748832 (-0.18%); split: -0.29%, +0.12%
VGPRs: 3019304 -> 3099596 (+2.66%); split: -0.11%, +2.77%
Latency: 268857129 -> 265951122 (-1.08%); split: -1.33%, +0.25%
InvThroughput: 40960938 -> 41044533 (+0.20%); split: -0.18%, +0.39%
VClause: 794000 -> 782913 (-1.40%); split: -2.24%, +0.84%
SClause: 1192476 -> 1150831 (-3.49%); split: -3.94%, +0.45%
Copies: 2720470 -> 2700148 (-0.75%); split: -1.84%, +1.09%
Branches: 785926 -> 785951 (+0.00%); split: -0.01%, +0.01%
VALU: 22918411 -> 22890189 (-0.12%); split: -0.19%, +0.06%
SALU: 5281201 -> 5289486 (+0.16%); split: -0.21%, +0.36%
VOPD: 8790 -> 8685 (-1.19%); split: +1.08%, -2.28%
Totals from 62081 (77.77% of 79825) affected shaders: (Navi31)
MaxWaves: 1848555 -> 1812347 (-1.96%); split: +0.01%, -1.97%
Instrs: 39794460 -> 39704180 (-0.23%); split: -0.39%, +0.16%
CodeSize: 208987052 -> 208621524 (-0.17%); split: -0.31%, +0.13%
VGPRs: 3046284 -> 3135156 (+2.92%); split: -0.11%, +3.03%
Latency: 268863465 -> 265218186 (-1.36%); split: -1.59%, +0.23%
InvThroughput: 41101515 -> 41167075 (+0.16%); split: -0.22%, +0.38%
VClause: 795316 -> 774899 (-2.57%); split: -3.17%, +0.61%
SClause: 1177294 -> 1135451 (-3.55%); split: -4.06%, +0.51%
Copies: 2743254 -> 2725127 (-0.66%); split: -1.90%, +1.24%
Branches: 801395 -> 801428 (+0.00%); split: -0.01%, +0.02%
VALU: 23898938 -> 23871294 (-0.12%); split: -0.20%, +0.08%
SALU: 3908807 -> 3919130 (+0.26%); split: -0.23%, +0.50%
VOPD: 8529 -> 8500 (-0.34%); split: +1.29%, -1.63%
Totals from 44996 (71.01% of 63370) affected shaders: (Vega10)
MaxWaves: 307074 -> 304808 (-0.74%); split: +0.63%, -1.37%
Instrs: 22743534 -> 22716240 (-0.12%); split: -0.22%, +0.10%
CodeSize: 117284856 -> 117173212 (-0.10%); split: -0.19%, +0.09%
SGPRs: 3249008 -> 3330480 (+2.51%); split: -0.36%, +2.87%
VGPRs: 1901400 -> 1943880 (+2.23%); split: -0.60%, +2.83%
Latency: 224839126 -> 222878477 (-0.87%); split: -1.19%, +0.31%
InvThroughput: 114389570 -> 114316559 (-0.06%); split: -0.17%, +0.11%
VClause: 482012 -> 473304 (-1.81%); split: -2.86%, +1.05%
SClause: 757799 -> 717092 (-5.37%); split: -5.64%, +0.27%
Copies: 2182735 -> 2183598 (+0.04%); split: -1.17%, +1.21%
Branches: 396026 -> 395996 (-0.01%); split: -0.03%, +0.02%
VALU: 16740283 -> 16728098 (-0.07%); split: -0.14%, +0.07%
SALU: 2133575 -> 2145863 (+0.58%); split: -0.29%, +0.86%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30720>
2024-07-24 11:51:31 +02:00
|
|
|
ctx.last_SMEM_stall = 10 - ctx.occupancy_factor - k;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
2024-06-13 11:42:11 +02:00
|
|
|
schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
|
|
|
|
assert(idx != 0);
|
|
|
|
|
int window_size = VMEM_WINDOW_SIZE;
|
|
|
|
|
int max_moves = VMEM_MAX_MOVES;
|
2019-10-18 13:05:00 +01:00
|
|
|
int clause_max_grab_dist = VMEM_CLAUSE_MAX_GRAB_DIST;
|
2021-05-19 09:23:20 +02:00
|
|
|
bool only_clauses = false;
|
2019-09-17 13:22:17 +02:00
|
|
|
int16_t k = 0;
|
|
|
|
|
|
|
|
|
|
/* first, check if we have instructions before current to move down */
|
2019-11-07 14:48:51 +00:00
|
|
|
hazard_query indep_hq;
|
|
|
|
|
hazard_query clause_hq;
|
2022-07-21 15:45:11 +01:00
|
|
|
init_hazard_query(ctx, &indep_hq);
|
|
|
|
|
init_hazard_query(ctx, &clause_hq);
|
2019-11-07 14:48:51 +00:00
|
|
|
add_to_hazard_query(&indep_hq, current);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int)idx - window_size;
|
|
|
|
|
candidate_idx--) {
|
2021-06-07 12:02:43 +02:00
|
|
|
assert(candidate_idx == cursor.source_idx);
|
2019-09-17 13:22:17 +02:00
|
|
|
assert(candidate_idx >= 0);
|
|
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
2021-01-20 15:27:16 +00:00
|
|
|
bool is_vmem = candidate->isVMEM() || candidate->isFlatLike();
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2025-08-06 10:37:53 +02:00
|
|
|
/* Break when encountering another VMEM instruction, logical_start or barriers. */
|
2025-08-20 22:47:46 +02:00
|
|
|
if (!is_reorderable(candidate.get()))
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
|
aco/scheduler: Stop downwards scheduling after encountering the first clause
Totals from 9899 (12.40% of 79839) affected shaders: (Navi48)
MaxWaves: 276355 -> 276317 (-0.01%); split: +0.01%, -0.02%
Instrs: 8781768 -> 8766504 (-0.17%); split: -0.25%, +0.07%
CodeSize: 46297556 -> 46236104 (-0.13%); split: -0.19%, +0.06%
VGPRs: 574680 -> 574800 (+0.02%); split: -0.00%, +0.03%
Latency: 54261324 -> 54357916 (+0.18%); split: -0.14%, +0.32%
InvThroughput: 9122700 -> 9121115 (-0.02%); split: -0.07%, +0.05%
VClause: 222062 -> 218499 (-1.60%); split: -2.33%, +0.73%
SClause: 167138 -> 163233 (-2.34%); split: -2.43%, +0.09%
Copies: 602395 -> 598560 (-0.64%); split: -1.21%, +0.57%
Branches: 161939 -> 161932 (-0.00%); split: -0.01%, +0.00%
VALU: 5063999 -> 5060199 (-0.08%); split: -0.14%, +0.07%
SALU: 988254 -> 988285 (+0.00%); split: -0.02%, +0.02%
VOPD: 2478 -> 2443 (-1.41%); split: +0.40%, -1.82%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-07-31 08:20:36 +02:00
|
|
|
if (should_form_clause(current, candidate.get())) {
|
2019-10-18 13:05:00 +01:00
|
|
|
/* We can't easily tell how much this will decrease the def-to-use
|
|
|
|
|
* distances, so just use how far it will be moved as a heuristic. */
|
aco/scheduler: Stop downwards scheduling after encountering the first clause
Totals from 9899 (12.40% of 79839) affected shaders: (Navi48)
MaxWaves: 276355 -> 276317 (-0.01%); split: +0.01%, -0.02%
Instrs: 8781768 -> 8766504 (-0.17%); split: -0.25%, +0.07%
CodeSize: 46297556 -> 46236104 (-0.13%); split: -0.19%, +0.06%
VGPRs: 574680 -> 574800 (+0.02%); split: -0.00%, +0.03%
Latency: 54261324 -> 54357916 (+0.18%); split: -0.14%, +0.32%
InvThroughput: 9122700 -> 9121115 (-0.02%); split: -0.07%, +0.05%
VClause: 222062 -> 218499 (-1.60%); split: -2.33%, +0.73%
SClause: 167138 -> 163233 (-2.34%); split: -2.43%, +0.09%
Copies: 602395 -> 598560 (-0.64%); split: -1.21%, +0.57%
Branches: 161939 -> 161932 (-0.00%); split: -0.01%, +0.00%
VALU: 5063999 -> 5060199 (-0.08%); split: -0.14%, +0.07%
SALU: 988254 -> 988285 (+0.00%); split: -0.02%, +0.02%
VOPD: 2478 -> 2443 (-1.41%); split: +0.40%, -1.82%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-07-31 08:20:36 +02:00
|
|
|
int grab_dist = cursor.insert_idx_clause - candidate_idx;
|
|
|
|
|
if (grab_dist >= clause_max_grab_dist + k)
|
|
|
|
|
break;
|
|
|
|
|
|
2025-08-06 10:37:53 +02:00
|
|
|
if (perform_hazard_query(&clause_hq, candidate.get(), false) == hazard_success)
|
|
|
|
|
ctx.mv.downwards_move_clause(cursor);
|
|
|
|
|
|
|
|
|
|
/* We move the entire clause at once.
|
|
|
|
|
* Break as any earlier instructions have already been checked.
|
|
|
|
|
*/
|
aco/scheduler: Stop downwards scheduling after encountering the first clause
Totals from 9899 (12.40% of 79839) affected shaders: (Navi48)
MaxWaves: 276355 -> 276317 (-0.01%); split: +0.01%, -0.02%
Instrs: 8781768 -> 8766504 (-0.17%); split: -0.25%, +0.07%
CodeSize: 46297556 -> 46236104 (-0.13%); split: -0.19%, +0.06%
VGPRs: 574680 -> 574800 (+0.02%); split: -0.00%, +0.03%
Latency: 54261324 -> 54357916 (+0.18%); split: -0.14%, +0.32%
InvThroughput: 9122700 -> 9121115 (-0.02%); split: -0.07%, +0.05%
VClause: 222062 -> 218499 (-1.60%); split: -2.33%, +0.73%
SClause: 167138 -> 163233 (-2.34%); split: -2.43%, +0.09%
Copies: 602395 -> 598560 (-0.64%); split: -1.21%, +0.57%
Branches: 161939 -> 161932 (-0.00%); split: -0.01%, +0.00%
VALU: 5063999 -> 5060199 (-0.08%); split: -0.14%, +0.07%
SALU: 988254 -> 988285 (+0.00%); split: -0.02%, +0.02%
VOPD: 2478 -> 2443 (-1.41%); split: +0.40%, -1.82%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-07-31 08:20:36 +02:00
|
|
|
break;
|
2019-10-18 13:05:00 +01:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
|
aco/scheduler: ignore potential SMEM stalls when forming clauses
Totals from 4190 (5.25% of 79839) affected shaders: (Navi48)
MaxWaves: 117020 -> 117014 (-0.01%)
Instrs: 4801892 -> 4801547 (-0.01%); split: -0.06%, +0.05%
CodeSize: 25327632 -> 25325500 (-0.01%); split: -0.05%, +0.04%
VGPRs: 236452 -> 236488 (+0.02%)
Latency: 30569070 -> 30539464 (-0.10%); split: -0.13%, +0.04%
InvThroughput: 4891650 -> 4891062 (-0.01%); split: -0.03%, +0.01%
VClause: 119615 -> 118763 (-0.71%); split: -1.02%, +0.31%
SClause: 100482 -> 100297 (-0.18%); split: -0.44%, +0.26%
Copies: 326644 -> 326756 (+0.03%); split: -0.19%, +0.22%
Branches: 98982 -> 98980 (-0.00%)
VALU: 2712397 -> 2712534 (+0.01%); split: -0.02%, +0.03%
SALU: 591836 -> 591817 (-0.00%); split: -0.00%, +0.00%
VOPD: 993 -> 987 (-0.60%); split: +0.20%, -0.81%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-08-05 11:11:22 +02:00
|
|
|
/* Break if we'd make the previous SMEM instruction stall. */
|
|
|
|
|
bool can_stall_prev_smem =
|
|
|
|
|
idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
|
2025-08-06 10:37:53 +02:00
|
|
|
if (can_stall_prev_smem && ctx.last_SMEM_stall >= 0)
|
aco/scheduler: ignore potential SMEM stalls when forming clauses
Totals from 4190 (5.25% of 79839) affected shaders: (Navi48)
MaxWaves: 117020 -> 117014 (-0.01%)
Instrs: 4801892 -> 4801547 (-0.01%); split: -0.06%, +0.05%
CodeSize: 25327632 -> 25325500 (-0.01%); split: -0.05%, +0.04%
VGPRs: 236452 -> 236488 (+0.02%)
Latency: 30569070 -> 30539464 (-0.10%); split: -0.13%, +0.04%
InvThroughput: 4891650 -> 4891062 (-0.01%); split: -0.03%, +0.01%
VClause: 119615 -> 118763 (-0.71%); split: -1.02%, +0.31%
SClause: 100482 -> 100297 (-0.18%); split: -0.44%, +0.26%
Copies: 326644 -> 326756 (+0.03%); split: -0.19%, +0.22%
Branches: 98982 -> 98980 (-0.00%)
VALU: 2712397 -> 2712534 (+0.01%); split: -0.02%, +0.03%
SALU: 591836 -> 591817 (-0.00%); split: -0.00%, +0.00%
VOPD: 993 -> 987 (-0.60%); split: +0.20%, -0.81%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-08-05 11:11:22 +02:00
|
|
|
break;
|
|
|
|
|
|
2025-08-06 10:37:53 +02:00
|
|
|
/* If current depends on candidate, add additional dependencies and continue. */
|
|
|
|
|
bool can_move_down = !only_clauses && (!is_vmem || candidate->definitions.empty());
|
|
|
|
|
|
|
|
|
|
HazardResult haz = perform_hazard_query(&indep_hq, candidate.get(), false);
|
2020-02-11 14:15:32 +00:00
|
|
|
if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
|
2020-02-11 16:55:39 +00:00
|
|
|
haz == hazard_fail_reorder_sendmsg || haz == hazard_fail_barrier ||
|
|
|
|
|
haz == hazard_fail_export)
|
2019-09-17 13:22:17 +02:00
|
|
|
can_move_down = false;
|
2019-11-07 14:48:51 +00:00
|
|
|
else if (haz != hazard_success)
|
|
|
|
|
break;
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
if (!can_move_down) {
|
2019-11-07 14:48:51 +00:00
|
|
|
add_to_hazard_query(&indep_hq, candidate.get());
|
|
|
|
|
add_to_hazard_query(&clause_hq, candidate.get());
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.downwards_skip(cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-01 16:58:01 +02:00
|
|
|
MoveResult res = ctx.mv.downwards_move(cursor);
|
|
|
|
|
if (res == move_fail_ssa || res == move_fail_rar) {
|
|
|
|
|
add_to_hazard_query(&indep_hq, candidate.get());
|
|
|
|
|
add_to_hazard_query(&clause_hq, candidate.get());
|
|
|
|
|
ctx.mv.downwards_skip(cursor);
|
|
|
|
|
continue;
|
|
|
|
|
} else if (res == move_fail_pressure) {
|
aco/scheduler: Stop downwards scheduling after encountering the first clause
Totals from 9899 (12.40% of 79839) affected shaders: (Navi48)
MaxWaves: 276355 -> 276317 (-0.01%); split: +0.01%, -0.02%
Instrs: 8781768 -> 8766504 (-0.17%); split: -0.25%, +0.07%
CodeSize: 46297556 -> 46236104 (-0.13%); split: -0.19%, +0.06%
VGPRs: 574680 -> 574800 (+0.02%); split: -0.00%, +0.03%
Latency: 54261324 -> 54357916 (+0.18%); split: -0.14%, +0.32%
InvThroughput: 9122700 -> 9121115 (-0.02%); split: -0.07%, +0.05%
VClause: 222062 -> 218499 (-1.60%); split: -2.33%, +0.73%
SClause: 167138 -> 163233 (-2.34%); split: -2.43%, +0.09%
Copies: 602395 -> 598560 (-0.64%); split: -1.21%, +0.57%
Branches: 161939 -> 161932 (-0.00%); split: -0.01%, +0.00%
VALU: 5063999 -> 5060199 (-0.08%); split: -0.14%, +0.07%
SALU: 988254 -> 988285 (+0.00%); split: -0.02%, +0.02%
VOPD: 2478 -> 2443 (-1.41%); split: +0.40%, -1.82%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-07-31 08:20:36 +02:00
|
|
|
only_clauses = true;
|
2025-08-01 16:58:01 +02:00
|
|
|
add_to_hazard_query(&indep_hq, candidate.get());
|
|
|
|
|
add_to_hazard_query(&clause_hq, candidate.get());
|
|
|
|
|
ctx.mv.downwards_skip(cursor);
|
|
|
|
|
continue;
|
aco/scheduler: Stop downwards scheduling after encountering the first clause
Totals from 9899 (12.40% of 79839) affected shaders: (Navi48)
MaxWaves: 276355 -> 276317 (-0.01%); split: +0.01%, -0.02%
Instrs: 8781768 -> 8766504 (-0.17%); split: -0.25%, +0.07%
CodeSize: 46297556 -> 46236104 (-0.13%); split: -0.19%, +0.06%
VGPRs: 574680 -> 574800 (+0.02%); split: -0.00%, +0.03%
Latency: 54261324 -> 54357916 (+0.18%); split: -0.14%, +0.32%
InvThroughput: 9122700 -> 9121115 (-0.02%); split: -0.07%, +0.05%
VClause: 222062 -> 218499 (-1.60%); split: -2.33%, +0.73%
SClause: 167138 -> 163233 (-2.34%); split: -2.43%, +0.09%
Copies: 602395 -> 598560 (-0.64%); split: -1.21%, +0.57%
Branches: 161939 -> 161932 (-0.00%); split: -0.01%, +0.00%
VALU: 5063999 -> 5060199 (-0.08%); split: -0.14%, +0.07%
SALU: 988254 -> 988285 (+0.00%); split: -0.02%, +0.02%
VOPD: 2478 -> 2443 (-1.41%); split: +0.40%, -1.82%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-07-31 08:20:36 +02:00
|
|
|
}
|
2025-08-01 16:58:01 +02:00
|
|
|
k++;
|
|
|
|
|
|
|
|
|
|
if (candidate_idx < ctx.last_SMEM_dep_idx)
|
|
|
|
|
ctx.last_SMEM_stall++;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* find the first instruction depending on current or find another VMEM */
|
2021-06-07 12:02:43 +02:00
|
|
|
UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, true);
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
bool found_dependency = false;
|
|
|
|
|
/* second, check if we have instructions after current to move up */
|
|
|
|
|
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int)idx + window_size;
|
|
|
|
|
candidate_idx++) {
|
2021-06-07 12:02:43 +02:00
|
|
|
assert(candidate_idx == up_cursor.source_idx);
|
2019-09-17 13:22:17 +02:00
|
|
|
assert(candidate_idx < (int)block->instructions.size());
|
|
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
2021-01-20 15:27:16 +00:00
|
|
|
bool is_vmem = candidate->isVMEM() || candidate->isFlatLike();
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2025-08-20 22:47:46 +02:00
|
|
|
if (!is_reorderable(candidate.get()))
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* check if candidate depends on current */
|
2019-10-31 17:33:35 +01:00
|
|
|
bool is_dependency = false;
|
2019-11-07 14:48:51 +00:00
|
|
|
if (found_dependency) {
|
2020-06-26 15:54:22 +01:00
|
|
|
HazardResult haz = perform_hazard_query(&indep_hq, candidate.get(), true);
|
2019-11-07 14:48:51 +00:00
|
|
|
if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
|
2020-02-11 14:15:32 +00:00
|
|
|
haz == hazard_fail_reorder_vmem_smem || haz == hazard_fail_reorder_sendmsg ||
|
2020-02-11 16:55:39 +00:00
|
|
|
haz == hazard_fail_barrier || haz == hazard_fail_export)
|
2019-11-07 14:48:51 +00:00
|
|
|
is_dependency = true;
|
2020-02-11 14:15:32 +00:00
|
|
|
else if (haz != hazard_success)
|
2019-11-07 14:48:51 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
is_dependency |= !found_dependency && !ctx.mv.upwards_check_deps(up_cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
if (is_dependency) {
|
|
|
|
|
if (!found_dependency) {
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.upwards_update_insert_idx(up_cursor);
|
2022-07-21 15:45:11 +01:00
|
|
|
init_hazard_query(ctx, &indep_hq);
|
2019-09-17 13:22:17 +02:00
|
|
|
found_dependency = true;
|
|
|
|
|
}
|
2019-11-27 17:27:36 +00:00
|
|
|
} else if (is_vmem) {
|
2019-10-31 17:33:35 +01:00
|
|
|
/* don't move up dependencies of other VMEM instructions */
|
2019-08-28 12:08:12 +02:00
|
|
|
for (const Definition& def : candidate->definitions) {
|
|
|
|
|
if (def.isTemp())
|
2019-11-06 16:38:57 +00:00
|
|
|
ctx.mv.depends_on[def.tempId()] = true;
|
2019-08-28 12:08:12 +02:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
2019-11-06 16:38:57 +00:00
|
|
|
if (is_dependency || !found_dependency) {
|
2019-11-07 14:48:51 +00:00
|
|
|
if (found_dependency)
|
|
|
|
|
add_to_hazard_query(&indep_hq, candidate.get());
|
aco: fix def-use distance calculation when scheduling.
This change also increases the VMEM_MAX_MOVES
to mitigate some of the scheduling changes.
Totals from 34301 (24.61% of 139391) affected shaders:
SGPRs: 2515440 -> 2552304 (+1.47%); split: -1.25%, +2.71%
VGPRs: 1786676 -> 1794724 (+0.45%); split: -0.31%, +0.76%
CodeSize: 151079856 -> 151209828 (+0.09%); split: -0.06%, +0.15%
MaxWaves: 392454 -> 388966 (-0.89%); split: +0.39%, -1.28%
Instrs: 28870746 -> 28895907 (+0.09%); split: -0.09%, +0.17%
Cycles: 960450680 -> 961315796 (+0.09%); split: -0.09%, +0.18%
VMEM: 19027987 -> 19796223 (+4.04%); split: +7.49%, -3.45%
SMEM: 2434691 -> 2394829 (-1.64%); split: +2.80%, -4.43%
VClause: 551776 -> 543051 (-1.58%); split: -1.73%, +0.15%
SClause: 1230147 -> 1227637 (-0.20%); split: -1.40%, +1.20%
Copies: 1957640 -> 1963617 (+0.31%); split: -1.11%, +1.41%
Branches: 611747 -> 612504 (+0.12%); split: -0.11%, +0.23%
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7903>
2020-08-13 10:13:49 +02:00
|
|
|
else
|
|
|
|
|
k++;
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.upwards_skip(up_cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
}
|
2019-11-06 16:38:57 +00:00
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
MoveResult res = ctx.mv.upwards_move(up_cursor);
|
2019-11-06 16:38:57 +00:00
|
|
|
if (res == move_fail_ssa || res == move_fail_rar) {
|
2019-11-07 14:48:51 +00:00
|
|
|
add_to_hazard_query(&indep_hq, candidate.get());
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.upwards_skip(up_cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
2019-11-06 16:38:57 +00:00
|
|
|
} else if (res == move_fail_pressure) {
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
k++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
void
|
2024-06-13 11:42:11 +02:00
|
|
|
schedule_LDS(sched_ctx& ctx, Block* block, Instruction* current, int idx)
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
{
|
|
|
|
|
assert(idx != 0);
|
|
|
|
|
int window_size = LDS_WINDOW_SIZE;
|
aco: schedule LDS instructions
fossil-db (navi31):
Totals from 1823 (2.30% of 79395) affected shaders:
MaxWaves: 53845 -> 53827 (-0.03%); split: +0.02%, -0.05%
Instrs: 1736317 -> 1731200 (-0.29%); split: -0.38%, +0.09%
CodeSize: 8876760 -> 8857908 (-0.21%); split: -0.29%, +0.08%
VGPRs: 91688 -> 92276 (+0.64%); split: -0.03%, +0.67%
Latency: 11743095 -> 11698872 (-0.38%); split: -0.42%, +0.04%
InvThroughput: 2070526 -> 2067440 (-0.15%); split: -0.17%, +0.02%
VClause: 39048 -> 39058 (+0.03%); split: -0.01%, +0.03%
SClause: 35371 -> 35406 (+0.10%); split: -0.02%, +0.12%
Copies: 104335 -> 104384 (+0.05%); split: -0.21%, +0.26%
Branches: 29769 -> 29794 (+0.08%); split: -0.00%, +0.09%
VALU: 970925 -> 970974 (+0.01%); split: -0.01%, +0.02%
SALU: 146222 -> 146345 (+0.08%); split: -0.01%, +0.09%
VOPD: 1119 -> 1162 (+3.84%); split: +4.29%, -0.45%
fossil-db (navi21):
Totals from 37078 (46.70% of 79395) affected shaders:
MaxWaves: 990093 -> 990025 (-0.01%)
Instrs: 21130662 -> 21182543 (+0.25%); split: -0.01%, +0.26%
CodeSize: 110205364 -> 110415032 (+0.19%); split: -0.01%, +0.20%
VGPRs: 1407168 -> 1410768 (+0.26%)
Latency: 90024839 -> 89929196 (-0.11%); split: -0.11%, +0.01%
InvThroughput: 17170356 -> 17167412 (-0.02%); split: -0.02%, +0.00%
VClause: 392830 -> 392825 (-0.00%); split: -0.01%, +0.01%
SClause: 463150 -> 463188 (+0.01%); split: -0.00%, +0.01%
Copies: 1768433 -> 1768483 (+0.00%); split: -0.02%, +0.02%
Branches: 605989 -> 606011 (+0.00%); split: -0.00%, +0.00%
VALU: 11614810 -> 11614912 (+0.00%); split: -0.00%, +0.00%
SALU: 3794531 -> 3794655 (+0.00%); split: -0.00%, +0.00%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-18 14:55:20 +01:00
|
|
|
int max_moves = current->isLDSDIR() ? LDSDIR_MAX_MOVES : LDS_MAX_MOVES;
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
int16_t k = 0;
|
|
|
|
|
|
|
|
|
|
/* first, check if we have instructions before current to move down */
|
|
|
|
|
hazard_query hq;
|
|
|
|
|
init_hazard_query(ctx, &hq);
|
|
|
|
|
add_to_hazard_query(&hq, current);
|
|
|
|
|
|
|
|
|
|
DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
|
|
|
|
|
|
|
|
|
|
for (int i = 0; k < max_moves && i < window_size; i++) {
|
|
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[cursor.source_idx];
|
|
|
|
|
bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
|
2025-08-20 22:47:46 +02:00
|
|
|
if (!is_reorderable(candidate.get()) || is_mem)
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
if (candidate->isDS() || candidate->isLDSDIR()) {
|
|
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
|
|
|
|
ctx.mv.downwards_skip(cursor);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (perform_hazard_query(&hq, candidate.get(), false) != hazard_success ||
|
2025-08-01 11:11:41 +02:00
|
|
|
ctx.mv.downwards_move(cursor) != move_success)
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
k++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* second, check if we have instructions after current to move up */
|
|
|
|
|
bool found_dependency = false;
|
|
|
|
|
int i = 0;
|
|
|
|
|
UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, true);
|
|
|
|
|
/* find the first instruction depending on current */
|
|
|
|
|
for (; k < max_moves && i < window_size; i++) {
|
|
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[up_cursor.source_idx];
|
|
|
|
|
bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
|
2025-08-20 22:47:46 +02:00
|
|
|
if (!is_reorderable(candidate.get()) || is_mem)
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* check if candidate depends on current */
|
|
|
|
|
if (!ctx.mv.upwards_check_deps(up_cursor)) {
|
|
|
|
|
init_hazard_query(ctx, &hq);
|
|
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
|
|
|
|
ctx.mv.upwards_update_insert_idx(up_cursor);
|
|
|
|
|
ctx.mv.upwards_skip(up_cursor);
|
|
|
|
|
found_dependency = true;
|
|
|
|
|
i++;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ctx.mv.upwards_skip(up_cursor);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (; found_dependency && k < max_moves && i < window_size; i++) {
|
|
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[up_cursor.source_idx];
|
|
|
|
|
bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
|
2025-08-20 22:47:46 +02:00
|
|
|
if (!is_reorderable(candidate.get()) || is_mem)
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
HazardResult haz = perform_hazard_query(&hq, candidate.get(), true);
|
|
|
|
|
if (haz == hazard_fail_exec || haz == hazard_fail_unreorderable)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
if (haz != hazard_success || ctx.mv.upwards_move(up_cursor) != move_success) {
|
|
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
|
|
|
|
ctx.mv.upwards_skip(up_cursor);
|
|
|
|
|
} else {
|
|
|
|
|
k++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
void
|
2024-06-13 11:42:11 +02:00
|
|
|
schedule_position_export(sched_ctx& ctx, Block* block, Instruction* current, int idx)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
|
|
|
|
assert(idx != 0);
|
2021-07-05 15:26:18 +02:00
|
|
|
int window_size = POS_EXP_WINDOW_SIZE / ctx.schedule_pos_export_div;
|
|
|
|
|
int max_moves = POS_EXP_MAX_MOVES / ctx.schedule_pos_export_div;
|
2019-09-17 13:22:17 +02:00
|
|
|
int16_t k = 0;
|
|
|
|
|
|
2021-06-07 12:02:43 +02:00
|
|
|
DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2019-11-07 14:48:51 +00:00
|
|
|
hazard_query hq;
|
2022-07-21 15:45:11 +01:00
|
|
|
init_hazard_query(ctx, &hq);
|
2019-11-07 14:48:51 +00:00
|
|
|
add_to_hazard_query(&hq, current);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int)idx - window_size;
|
|
|
|
|
candidate_idx--) {
|
|
|
|
|
assert(candidate_idx >= 0);
|
|
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
|
|
|
|
|
2025-08-20 22:47:46 +02:00
|
|
|
if (!is_reorderable(candidate.get()))
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
2021-01-20 15:27:16 +00:00
|
|
|
if (candidate->isVMEM() || candidate->isSMEM() || candidate->isFlatLike())
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
|
2020-06-26 15:54:22 +01:00
|
|
|
HazardResult haz = perform_hazard_query(&hq, candidate.get(), false);
|
2020-04-01 15:38:43 +02:00
|
|
|
if (haz == hazard_fail_exec || haz == hazard_fail_unreorderable)
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
|
2019-11-07 14:48:51 +00:00
|
|
|
if (haz != hazard_success) {
|
|
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.downwards_skip(cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-01 11:11:41 +02:00
|
|
|
MoveResult res = ctx.mv.downwards_move(cursor);
|
2019-11-06 16:38:57 +00:00
|
|
|
if (res == move_fail_ssa || res == move_fail_rar) {
|
2019-11-07 14:48:51 +00:00
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
2021-06-07 12:02:43 +02:00
|
|
|
ctx.mv.downwards_skip(cursor);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
2019-11-06 16:38:57 +00:00
|
|
|
} else if (res == move_fail_pressure) {
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
k++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
aco/scheduler: schedule VMEM store clauses during the regular forward pass
Totals from 1456 (1.82% of 79839) affected shaders: (Navi48)
MaxWaves: 37780 -> 37128 (-1.73%); split: +0.15%, -1.87%
Instrs: 3788175 -> 3788435 (+0.01%); split: -0.04%, +0.04%
CodeSize: 20468648 -> 20467432 (-0.01%); split: -0.04%, +0.03%
VGPRs: 86820 -> 91440 (+5.32%); split: -0.10%, +5.42%
Latency: 26866232 -> 26858867 (-0.03%); split: -0.04%, +0.01%
InvThroughput: 3491741 -> 3828339 (+9.64%); split: -0.02%, +9.66%
VClause: 90413 -> 89426 (-1.09%); split: -1.27%, +0.18%
SClause: 130532 -> 130530 (-0.00%); split: -0.00%, +0.00%
Copies: 347397 -> 347806 (+0.12%); split: -0.11%, +0.23%
Branches: 117476 -> 117496 (+0.02%)
VALU: 1897427 -> 1897830 (+0.02%); split: -0.02%, +0.04%
SALU: 602365 -> 602379 (+0.00%)
VOPD: 1259 -> 1251 (-0.64%); split: +0.24%, -0.87%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-08-05 15:37:10 +02:00
|
|
|
void
|
2024-06-13 11:42:11 +02:00
|
|
|
schedule_VMEM_store(sched_ctx& ctx, Block* block, Instruction* current, int idx)
|
2023-06-07 17:08:10 +01:00
|
|
|
{
|
aco/scheduler: schedule VMEM store clauses during the regular forward pass
Totals from 1456 (1.82% of 79839) affected shaders: (Navi48)
MaxWaves: 37780 -> 37128 (-1.73%); split: +0.15%, -1.87%
Instrs: 3788175 -> 3788435 (+0.01%); split: -0.04%, +0.04%
CodeSize: 20468648 -> 20467432 (-0.01%); split: -0.04%, +0.03%
VGPRs: 86820 -> 91440 (+5.32%); split: -0.10%, +5.42%
Latency: 26866232 -> 26858867 (-0.03%); split: -0.04%, +0.01%
InvThroughput: 3491741 -> 3828339 (+9.64%); split: -0.02%, +9.66%
VClause: 90413 -> 89426 (-1.09%); split: -1.27%, +0.18%
SClause: 130532 -> 130530 (-0.00%); split: -0.00%, +0.00%
Copies: 347397 -> 347806 (+0.12%); split: -0.11%, +0.23%
Branches: 117476 -> 117496 (+0.02%)
VALU: 1897427 -> 1897830 (+0.02%); split: -0.02%, +0.04%
SALU: 602365 -> 602379 (+0.00%)
VOPD: 1259 -> 1251 (-0.64%); split: +0.24%, -0.87%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-08-05 15:37:10 +02:00
|
|
|
int max_distance = ctx.last_VMEM_store_idx + VMEM_STORE_CLAUSE_MAX_GRAB_DIST;
|
|
|
|
|
ctx.last_VMEM_store_idx = idx;
|
|
|
|
|
|
|
|
|
|
if (max_distance < idx)
|
|
|
|
|
return;
|
|
|
|
|
|
2023-06-07 17:08:10 +01:00
|
|
|
hazard_query hq;
|
|
|
|
|
init_hazard_query(ctx, &hq);
|
|
|
|
|
|
|
|
|
|
DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
|
|
|
|
|
|
2024-04-26 11:52:13 +01:00
|
|
|
for (int16_t k = 0; k < VMEM_STORE_CLAUSE_MAX_GRAB_DIST;) {
|
2023-06-07 17:08:10 +01:00
|
|
|
aco_ptr<Instruction>& candidate = block->instructions[cursor.source_idx];
|
2025-08-20 22:47:46 +02:00
|
|
|
if (!is_reorderable(candidate.get()))
|
2023-06-07 17:08:10 +01:00
|
|
|
break;
|
|
|
|
|
|
aco/scheduler: schedule VMEM store clauses during the regular forward pass
Totals from 1456 (1.82% of 79839) affected shaders: (Navi48)
MaxWaves: 37780 -> 37128 (-1.73%); split: +0.15%, -1.87%
Instrs: 3788175 -> 3788435 (+0.01%); split: -0.04%, +0.04%
CodeSize: 20468648 -> 20467432 (-0.01%); split: -0.04%, +0.03%
VGPRs: 86820 -> 91440 (+5.32%); split: -0.10%, +5.42%
Latency: 26866232 -> 26858867 (-0.03%); split: -0.04%, +0.01%
InvThroughput: 3491741 -> 3828339 (+9.64%); split: -0.02%, +9.66%
VClause: 90413 -> 89426 (-1.09%); split: -1.27%, +0.18%
SClause: 130532 -> 130530 (-0.00%); split: -0.00%, +0.00%
Copies: 347397 -> 347806 (+0.12%); split: -0.11%, +0.23%
Branches: 117476 -> 117496 (+0.02%)
VALU: 1897427 -> 1897830 (+0.02%); split: -0.02%, +0.04%
SALU: 602365 -> 602379 (+0.00%)
VOPD: 1259 -> 1251 (-0.64%); split: +0.24%, -0.87%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-08-05 15:37:10 +02:00
|
|
|
if (should_form_clause(current, candidate.get())) {
|
|
|
|
|
if (perform_hazard_query(&hq, candidate.get(), false) == hazard_success)
|
|
|
|
|
ctx.mv.downwards_move_clause(cursor);
|
|
|
|
|
break;
|
2023-06-07 17:08:10 +01:00
|
|
|
}
|
|
|
|
|
|
aco/scheduler: schedule VMEM store clauses during the regular forward pass
Totals from 1456 (1.82% of 79839) affected shaders: (Navi48)
MaxWaves: 37780 -> 37128 (-1.73%); split: +0.15%, -1.87%
Instrs: 3788175 -> 3788435 (+0.01%); split: -0.04%, +0.04%
CodeSize: 20468648 -> 20467432 (-0.01%); split: -0.04%, +0.03%
VGPRs: 86820 -> 91440 (+5.32%); split: -0.10%, +5.42%
Latency: 26866232 -> 26858867 (-0.03%); split: -0.04%, +0.01%
InvThroughput: 3491741 -> 3828339 (+9.64%); split: -0.02%, +9.66%
VClause: 90413 -> 89426 (-1.09%); split: -1.27%, +0.18%
SClause: 130532 -> 130530 (-0.00%); split: -0.00%, +0.00%
Copies: 347397 -> 347806 (+0.12%); split: -0.11%, +0.23%
Branches: 117476 -> 117496 (+0.02%)
VALU: 1897427 -> 1897830 (+0.02%); split: -0.02%, +0.04%
SALU: 602365 -> 602379 (+0.00%)
VOPD: 1259 -> 1251 (-0.64%); split: +0.24%, -0.87%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-08-05 15:37:10 +02:00
|
|
|
if (candidate->isVMEM() || candidate->isFlatLike())
|
2023-06-07 17:08:10 +01:00
|
|
|
break;
|
|
|
|
|
|
aco/scheduler: schedule VMEM store clauses during the regular forward pass
Totals from 1456 (1.82% of 79839) affected shaders: (Navi48)
MaxWaves: 37780 -> 37128 (-1.73%); split: +0.15%, -1.87%
Instrs: 3788175 -> 3788435 (+0.01%); split: -0.04%, +0.04%
CodeSize: 20468648 -> 20467432 (-0.01%); split: -0.04%, +0.03%
VGPRs: 86820 -> 91440 (+5.32%); split: -0.10%, +5.42%
Latency: 26866232 -> 26858867 (-0.03%); split: -0.04%, +0.01%
InvThroughput: 3491741 -> 3828339 (+9.64%); split: -0.02%, +9.66%
VClause: 90413 -> 89426 (-1.09%); split: -1.27%, +0.18%
SClause: 130532 -> 130530 (-0.00%); split: -0.00%, +0.00%
Copies: 347397 -> 347806 (+0.12%); split: -0.11%, +0.23%
Branches: 117476 -> 117496 (+0.02%)
VALU: 1897427 -> 1897830 (+0.02%); split: -0.02%, +0.04%
SALU: 602365 -> 602379 (+0.00%)
VOPD: 1259 -> 1251 (-0.64%); split: +0.24%, -0.87%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-08-05 15:37:10 +02:00
|
|
|
add_to_hazard_query(&hq, candidate.get());
|
|
|
|
|
ctx.mv.downwards_skip(cursor);
|
|
|
|
|
k += get_likely_cost(candidate.get());
|
|
|
|
|
}
|
2023-06-07 17:08:10 +01:00
|
|
|
}
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
void
|
2024-06-13 11:55:27 +02:00
|
|
|
schedule_block(sched_ctx& ctx, Program* program, Block* block)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
|
|
|
|
ctx.last_SMEM_dep_idx = 0;
|
2025-08-28 10:39:35 +01:00
|
|
|
ctx.last_VMEM_store_idx = INT_MIN;
|
2019-09-17 13:22:17 +02:00
|
|
|
ctx.last_SMEM_stall = INT16_MIN;
|
2019-11-06 16:38:57 +00:00
|
|
|
ctx.mv.block = block;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* go through all instructions and find memory loads */
|
|
|
|
|
for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
|
|
|
|
|
Instruction* current = block->instructions[idx].get();
|
|
|
|
|
|
2024-01-17 17:01:58 +01:00
|
|
|
if (current->opcode == aco_opcode::p_logical_end)
|
|
|
|
|
break;
|
|
|
|
|
|
2021-07-05 15:26:18 +02:00
|
|
|
if (block->kind & block_kind_export_end && current->isEXP() && ctx.schedule_pos_exports) {
|
2021-01-21 16:13:34 +00:00
|
|
|
unsigned target = current->exp().dest;
|
2020-08-13 16:54:57 +02:00
|
|
|
if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) {
|
|
|
|
|
ctx.mv.current = current;
|
2024-06-13 11:42:11 +02:00
|
|
|
schedule_position_export(ctx, block, current, idx);
|
2020-08-13 16:54:57 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-07 17:08:10 +01:00
|
|
|
if (current->definitions.empty()) {
|
aco/scheduler: schedule VMEM store clauses during the regular forward pass
Totals from 1456 (1.82% of 79839) affected shaders: (Navi48)
MaxWaves: 37780 -> 37128 (-1.73%); split: +0.15%, -1.87%
Instrs: 3788175 -> 3788435 (+0.01%); split: -0.04%, +0.04%
CodeSize: 20468648 -> 20467432 (-0.01%); split: -0.04%, +0.03%
VGPRs: 86820 -> 91440 (+5.32%); split: -0.10%, +5.42%
Latency: 26866232 -> 26858867 (-0.03%); split: -0.04%, +0.01%
InvThroughput: 3491741 -> 3828339 (+9.64%); split: -0.02%, +9.66%
VClause: 90413 -> 89426 (-1.09%); split: -1.27%, +0.18%
SClause: 130532 -> 130530 (-0.00%); split: -0.00%, +0.00%
Copies: 347397 -> 347806 (+0.12%); split: -0.11%, +0.23%
Branches: 117476 -> 117496 (+0.02%)
VALU: 1897427 -> 1897830 (+0.02%); split: -0.02%, +0.04%
SALU: 602365 -> 602379 (+0.00%)
VOPD: 1259 -> 1251 (-0.64%); split: +0.24%, -0.87%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
2025-08-05 15:37:10 +02:00
|
|
|
if ((current->isVMEM() || current->isFlatLike()) && program->gfx_level >= GFX11) {
|
|
|
|
|
ctx.mv.current = current;
|
|
|
|
|
schedule_VMEM_store(ctx, block, current, idx);
|
|
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
2023-06-07 17:08:10 +01:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2021-01-20 15:27:16 +00:00
|
|
|
if (current->isVMEM() || current->isFlatLike()) {
|
2019-11-06 16:38:57 +00:00
|
|
|
ctx.mv.current = current;
|
2024-06-13 11:42:11 +02:00
|
|
|
schedule_VMEM(ctx, block, current, idx);
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
|
|
|
|
|
2021-01-20 15:27:16 +00:00
|
|
|
if (current->isSMEM()) {
|
2019-11-06 16:38:57 +00:00
|
|
|
ctx.mv.current = current;
|
2024-06-13 11:42:11 +02:00
|
|
|
schedule_SMEM(ctx, block, current, idx);
|
2019-11-06 16:38:57 +00:00
|
|
|
}
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
|
aco: schedule LDS instructions
fossil-db (navi31):
Totals from 1823 (2.30% of 79395) affected shaders:
MaxWaves: 53845 -> 53827 (-0.03%); split: +0.02%, -0.05%
Instrs: 1736317 -> 1731200 (-0.29%); split: -0.38%, +0.09%
CodeSize: 8876760 -> 8857908 (-0.21%); split: -0.29%, +0.08%
VGPRs: 91688 -> 92276 (+0.64%); split: -0.03%, +0.67%
Latency: 11743095 -> 11698872 (-0.38%); split: -0.42%, +0.04%
InvThroughput: 2070526 -> 2067440 (-0.15%); split: -0.17%, +0.02%
VClause: 39048 -> 39058 (+0.03%); split: -0.01%, +0.03%
SClause: 35371 -> 35406 (+0.10%); split: -0.02%, +0.12%
Copies: 104335 -> 104384 (+0.05%); split: -0.21%, +0.26%
Branches: 29769 -> 29794 (+0.08%); split: -0.00%, +0.09%
VALU: 970925 -> 970974 (+0.01%); split: -0.01%, +0.02%
SALU: 146222 -> 146345 (+0.08%); split: -0.01%, +0.09%
VOPD: 1119 -> 1162 (+3.84%); split: +4.29%, -0.45%
fossil-db (navi21):
Totals from 37078 (46.70% of 79395) affected shaders:
MaxWaves: 990093 -> 990025 (-0.01%)
Instrs: 21130662 -> 21182543 (+0.25%); split: -0.01%, +0.26%
CodeSize: 110205364 -> 110415032 (+0.19%); split: -0.01%, +0.20%
VGPRs: 1407168 -> 1410768 (+0.26%)
Latency: 90024839 -> 89929196 (-0.11%); split: -0.11%, +0.01%
InvThroughput: 17170356 -> 17167412 (-0.02%); split: -0.02%, +0.00%
VClause: 392830 -> 392825 (-0.00%); split: -0.01%, +0.01%
SClause: 463150 -> 463188 (+0.01%); split: -0.00%, +0.01%
Copies: 1768433 -> 1768483 (+0.00%); split: -0.02%, +0.02%
Branches: 605989 -> 606011 (+0.00%); split: -0.00%, +0.00%
VALU: 11614810 -> 11614912 (+0.00%); split: -0.00%, +0.00%
SALU: 3794531 -> 3794655 (+0.00%); split: -0.00%, +0.00%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-18 14:55:20 +01:00
|
|
|
if (current->isLDSDIR() || (current->isDS() && !current->ds().gds)) {
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
ctx.mv.current = current;
|
2024-06-13 11:42:11 +02:00
|
|
|
schedule_LDS(ctx, block, current, idx);
|
aco: schedule LDSDIR instructions
fossil-db (navi31):
Totals from 33850 (42.63% of 79395) affected shaders:
MaxWaves: 1011236 -> 1011204 (-0.00%)
Instrs: 23589117 -> 23559185 (-0.13%); split: -0.21%, +0.08%
CodeSize: 126099716 -> 125968376 (-0.10%); split: -0.17%, +0.07%
VGPRs: 1348632 -> 1356012 (+0.55%); split: -0.09%, +0.63%
Latency: 183233795 -> 180997751 (-1.22%); split: -1.33%, +0.11%
InvThroughput: 27081576 -> 27056383 (-0.09%); split: -0.15%, +0.06%
VClause: 386453 -> 386551 (+0.03%); split: -0.11%, +0.13%
SClause: 811941 -> 813023 (+0.13%); split: -0.38%, +0.52%
Copies: 1279706 -> 1280051 (+0.03%); split: -0.46%, +0.49%
Branches: 416940 -> 416938 (-0.00%); split: -0.02%, +0.02%
VALU: 13566410 -> 13567367 (+0.01%); split: -0.04%, +0.04%
SALU: 1835804 -> 1835652 (-0.01%); split: -0.02%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11013
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
2024-04-15 11:42:14 +01:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* resummarize the block's register demand */
|
2024-06-26 12:07:12 +02:00
|
|
|
block->register_demand = block->live_in_demand;
|
2024-06-21 15:45:22 +02:00
|
|
|
for (const aco_ptr<Instruction>& instr : block->instructions)
|
|
|
|
|
block->register_demand.update(instr->register_demand);
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
2024-06-15 16:17:29 +02:00
|
|
|
} /* end namespace */
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
void
|
2024-06-13 11:55:27 +02:00
|
|
|
schedule_program(Program* program)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
aco: fix max_waves_per_simd on Polaris, VegaM and GFX10.3
fossil-db (Polaris):
Totals from 20263 (14.75% of 137414) affected shaders:
SGPRs: 871407 -> 871679 (+0.03%); split: -0.00%, +0.03%
VGPRs: 513828 -> 550028 (+7.05%); split: -1.68%, +8.72%
CodeSize: 18869680 -> 18828148 (-0.22%); split: -0.23%, +0.01%
MaxWaves: 162012 -> 162030 (+0.01%); split: +0.01%, -0.00%
Instrs: 3629172 -> 3618817 (-0.29%); split: -0.30%, +0.02%
Cycles: 15682244 -> 15638244 (-0.28%); split: -0.30%, +0.02%
VMEM: 10675942 -> 10673344 (-0.02%); split: +0.18%, -0.21%
SMEM: 1209717 -> 1206088 (-0.30%); split: +0.03%, -0.33%
VClause: 81780 -> 81227 (-0.68%); split: -0.73%, +0.06%
SClause: 231724 -> 231561 (-0.07%); split: -0.07%, +0.00%
Copies: 187126 -> 180831 (-3.36%); split: -3.62%, +0.26%
Branches: 26841 -> 26837 (-0.01%); split: -0.03%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5546>
2020-06-18 14:30:51 +01:00
|
|
|
/* don't use program->max_reg_demand because that is affected by max_waves_per_simd */
|
|
|
|
|
RegisterDemand demand;
|
|
|
|
|
for (Block& block : program->blocks)
|
|
|
|
|
demand.update(block.register_demand);
|
2021-02-05 14:38:08 +01:00
|
|
|
demand.vgpr += program->config->num_shared_vgprs / 2;
|
aco: fix max_waves_per_simd on Polaris, VegaM and GFX10.3
fossil-db (Polaris):
Totals from 20263 (14.75% of 137414) affected shaders:
SGPRs: 871407 -> 871679 (+0.03%); split: -0.00%, +0.03%
VGPRs: 513828 -> 550028 (+7.05%); split: -1.68%, +8.72%
CodeSize: 18869680 -> 18828148 (-0.22%); split: -0.23%, +0.01%
MaxWaves: 162012 -> 162030 (+0.01%); split: +0.01%, -0.00%
Instrs: 3629172 -> 3618817 (-0.29%); split: -0.30%, +0.02%
Cycles: 15682244 -> 15638244 (-0.28%); split: -0.30%, +0.02%
VMEM: 10675942 -> 10673344 (-0.02%); split: +0.18%, -0.21%
SMEM: 1209717 -> 1206088 (-0.30%); split: +0.03%, -0.33%
VClause: 81780 -> 81227 (-0.68%); split: -0.73%, +0.06%
SClause: 231724 -> 231561 (-0.07%); split: -0.07%, +0.00%
Copies: 187126 -> 180831 (-3.36%); split: -3.62%, +0.26%
Branches: 26841 -> 26837 (-0.01%); split: -0.03%, +0.01%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5546>
2020-06-18 14:30:51 +01:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
sched_ctx ctx;
|
2022-07-21 15:45:11 +01:00
|
|
|
ctx.gfx_level = program->gfx_level;
|
2025-09-09 10:03:59 +01:00
|
|
|
ctx.program = program;
|
2019-11-06 16:38:57 +00:00
|
|
|
ctx.mv.depends_on.resize(program->peekAllocationId());
|
|
|
|
|
ctx.mv.RAR_dependencies.resize(program->peekAllocationId());
|
|
|
|
|
ctx.mv.RAR_dependencies_clause.resize(program->peekAllocationId());
|
aco/scheduler: improve scheduling heuristic
The heuristic we are currently using still stems from the GCN era
with the only adjustments being made for RDNA was to double (or triple)
the wave count.
This rewrite aims to detangle some concepts and provide more consistent results.
- wave_factor: The purpose of this value is to reflect that RDNA SIMDs can
accomodate twice as many waves as GCN SIMDs.
- reg_file_multiple: This value accounts for the larger register file of wave32
and some RDNA3 families.
- wave_minimum: Below this value, we don't sacrifice any waves. It corresponds
to a register demand of 64 VGPRs in wave64.
- occupancy_factor: Depending on target_waves and wave_factor, this controls
the scheduling window sizes and number of moves.
The main differences from the previous heuristic is a lower wave minimum and
a slightly less aggressive reduction of waves.
It also increases SMEM_MAX_MOVES in order to mitigate some of the changes
from targeting less waves.
Totals from 62777 (78.63% of 79839) affected shaders: (Navi48)
MaxWaves: 1880983 -> 1848028 (-1.75%); split: +0.01%, -1.76%
Instrs: 40904711 -> 40800797 (-0.25%); split: -0.39%, +0.14%
CodeSize: 217132208 -> 216748832 (-0.18%); split: -0.29%, +0.12%
VGPRs: 3019304 -> 3099596 (+2.66%); split: -0.11%, +2.77%
Latency: 268857129 -> 265951122 (-1.08%); split: -1.33%, +0.25%
InvThroughput: 40960938 -> 41044533 (+0.20%); split: -0.18%, +0.39%
VClause: 794000 -> 782913 (-1.40%); split: -2.24%, +0.84%
SClause: 1192476 -> 1150831 (-3.49%); split: -3.94%, +0.45%
Copies: 2720470 -> 2700148 (-0.75%); split: -1.84%, +1.09%
Branches: 785926 -> 785951 (+0.00%); split: -0.01%, +0.01%
VALU: 22918411 -> 22890189 (-0.12%); split: -0.19%, +0.06%
SALU: 5281201 -> 5289486 (+0.16%); split: -0.21%, +0.36%
VOPD: 8790 -> 8685 (-1.19%); split: +1.08%, -2.28%
Totals from 62081 (77.77% of 79825) affected shaders: (Navi31)
MaxWaves: 1848555 -> 1812347 (-1.96%); split: +0.01%, -1.97%
Instrs: 39794460 -> 39704180 (-0.23%); split: -0.39%, +0.16%
CodeSize: 208987052 -> 208621524 (-0.17%); split: -0.31%, +0.13%
VGPRs: 3046284 -> 3135156 (+2.92%); split: -0.11%, +3.03%
Latency: 268863465 -> 265218186 (-1.36%); split: -1.59%, +0.23%
InvThroughput: 41101515 -> 41167075 (+0.16%); split: -0.22%, +0.38%
VClause: 795316 -> 774899 (-2.57%); split: -3.17%, +0.61%
SClause: 1177294 -> 1135451 (-3.55%); split: -4.06%, +0.51%
Copies: 2743254 -> 2725127 (-0.66%); split: -1.90%, +1.24%
Branches: 801395 -> 801428 (+0.00%); split: -0.01%, +0.02%
VALU: 23898938 -> 23871294 (-0.12%); split: -0.20%, +0.08%
SALU: 3908807 -> 3919130 (+0.26%); split: -0.23%, +0.50%
VOPD: 8529 -> 8500 (-0.34%); split: +1.29%, -1.63%
Totals from 44996 (71.01% of 63370) affected shaders: (Vega10)
MaxWaves: 307074 -> 304808 (-0.74%); split: +0.63%, -1.37%
Instrs: 22743534 -> 22716240 (-0.12%); split: -0.22%, +0.10%
CodeSize: 117284856 -> 117173212 (-0.10%); split: -0.19%, +0.09%
SGPRs: 3249008 -> 3330480 (+2.51%); split: -0.36%, +2.87%
VGPRs: 1901400 -> 1943880 (+2.23%); split: -0.60%, +2.83%
Latency: 224839126 -> 222878477 (-0.87%); split: -1.19%, +0.31%
InvThroughput: 114389570 -> 114316559 (-0.06%); split: -0.17%, +0.11%
VClause: 482012 -> 473304 (-1.81%); split: -2.86%, +1.05%
SClause: 757799 -> 717092 (-5.37%); split: -5.64%, +0.27%
Copies: 2182735 -> 2183598 (+0.04%); split: -1.17%, +1.21%
Branches: 396026 -> 395996 (-0.01%); split: -0.03%, +0.02%
VALU: 16740283 -> 16728098 (-0.07%); split: -0.14%, +0.07%
SALU: 2133575 -> 2145863 (+0.58%); split: -0.29%, +0.86%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30720>
2024-07-24 11:51:31 +02:00
|
|
|
|
|
|
|
|
const int wave_factor = program->gfx_level >= GFX10 ? 2 : 1;
|
|
|
|
|
const int wave_minimum = std::max<int>(program->min_waves, 4 * wave_factor);
|
|
|
|
|
const float reg_file_multiple = program->dev.physical_vgprs / (256.0 * wave_factor);
|
|
|
|
|
|
|
|
|
|
/* If we already have less waves than the minimum, don't reduce them further.
|
|
|
|
|
* Otherwise, sacrifice some waves and use more VGPRs, in order to improve scheduling.
|
|
|
|
|
*/
|
|
|
|
|
int vgpr_demand = std::max<int>(24, demand.vgpr) + 12 * reg_file_multiple;
|
|
|
|
|
int target_waves = std::max(wave_minimum, program->dev.physical_vgprs / vgpr_demand);
|
|
|
|
|
target_waves = max_suitable_waves(program, std::min<int>(program->num_waves, target_waves));
|
|
|
|
|
assert(target_waves >= program->min_waves);
|
|
|
|
|
|
|
|
|
|
ctx.mv.max_registers = get_addr_regs_from_waves(program, target_waves);
|
2024-07-24 13:12:28 +02:00
|
|
|
ctx.mv.max_registers.vgpr -= 2;
|
aco/scheduler: stop rounding down the target number of waves on GFX10+
This way, it can make use of uneven wave numbers.
Totals from 4078 (5.14% of 79395) affected shaders: (Navi21)
MaxWaves: 58715 -> 65460 (+11.49%); split: +11.49%, -0.01%
Instrs: 5033684 -> 5048244 (+0.29%); split: -0.09%, +0.38%
CodeSize: 26833884 -> 26898780 (+0.24%); split: -0.07%, +0.32%
VGPRs: 302360 -> 265312 (-12.25%); split: -12.26%, +0.01%
Latency: 34636448 -> 36044242 (+4.06%); split: -0.08%, +4.14%
InvThroughput: 7999403 -> 7662697 (-4.21%); split: -4.55%, +0.34%
VClause: 105403 -> 111996 (+6.26%); split: -0.40%, +6.66%
SClause: 132996 -> 133460 (+0.35%); split: -0.81%, +1.16%
Copies: 297036 -> 308122 (+3.73%); split: -0.64%, +4.37%
Branches: 89376 -> 89390 (+0.02%); split: -0.00%, +0.02%
VALU: 3477621 -> 3488510 (+0.31%); split: -0.05%, +0.36%
SALU: 484211 -> 484191 (-0.00%); split: -0.08%, +0.08%
Totals from 1840 (2.32% of 79395) affected shaders: (Navi31)
MaxWaves: 30714 -> 34182 (+11.29%)
Instrs: 3102955 -> 3131001 (+0.90%); split: -0.05%, +0.95%
CodeSize: 16160564 -> 16273100 (+0.70%); split: -0.04%, +0.74%
VGPRs: 174540 -> 150600 (-13.72%)
Latency: 23521914 -> 24515055 (+4.22%); split: -0.07%, +4.29%
InvThroughput: 4373397 -> 4202912 (-3.90%); split: -4.40%, +0.50%
VClause: 59087 -> 64091 (+8.47%); split: -0.24%, +8.71%
SClause: 74844 -> 75366 (+0.70%); split: -0.53%, +1.22%
Copies: 184396 -> 197747 (+7.24%); split: -0.25%, +7.49%
Branches: 46015 -> 46028 (+0.03%); split: -0.00%, +0.03%
VALU: 1929286 -> 1942709 (+0.70%); split: -0.02%, +0.71%
SALU: 216126 -> 215983 (-0.07%); split: -0.18%, +0.12%
VOPD: 1216 -> 1217 (+0.08%); split: +1.40%, -1.32%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33644>
2024-07-24 11:48:47 +02:00
|
|
|
|
aco: fix num_waves on GFX10+
There are half the SIMDs per CU and physical_vgprs should be 512 instead
of 256.
fossil-db (GFX10.3):
Totals from 3622 (2.60% of 139391) affected shaders:
VGPRs: 298192 -> 289732 (-2.84%); split: -3.43%, +0.59%
CodeSize: 29443432 -> 29458388 (+0.05%); split: -0.00%, +0.06%
MaxWaves: 21703 -> 23395 (+7.80%); split: +7.84%, -0.05%
Instrs: 5677920 -> 5681438 (+0.06%); split: -0.01%, +0.07%
Cycles: 280715524 -> 280895676 (+0.06%); split: -0.00%, +0.07%
VMEM: 981142 -> 981894 (+0.08%); split: +0.18%, -0.10%
SMEM: 243315 -> 243454 (+0.06%); split: +0.07%, -0.02%
VClause: 88991 -> 89767 (+0.87%); split: -0.02%, +0.89%
SClause: 200660 -> 200659 (-0.00%); split: -0.00%, +0.00%
Copies: 430729 -> 434160 (+0.80%); split: -0.07%, +0.86%
Branches: 158004 -> 158021 (+0.01%); split: -0.01%, +0.02%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8523>
2021-01-19 11:37:52 +00:00
|
|
|
/* VMEM_MAX_MOVES and such assume pre-GFX10 wave count */
|
aco/scheduler: improve scheduling heuristic
The heuristic we are currently using still stems from the GCN era
with the only adjustments being made for RDNA was to double (or triple)
the wave count.
This rewrite aims to detangle some concepts and provide more consistent results.
- wave_factor: The purpose of this value is to reflect that RDNA SIMDs can
accomodate twice as many waves as GCN SIMDs.
- reg_file_multiple: This value accounts for the larger register file of wave32
and some RDNA3 families.
- wave_minimum: Below this value, we don't sacrifice any waves. It corresponds
to a register demand of 64 VGPRs in wave64.
- occupancy_factor: Depending on target_waves and wave_factor, this controls
the scheduling window sizes and number of moves.
The main differences from the previous heuristic is a lower wave minimum and
a slightly less aggressive reduction of waves.
It also increases SMEM_MAX_MOVES in order to mitigate some of the changes
from targeting less waves.
Totals from 62777 (78.63% of 79839) affected shaders: (Navi48)
MaxWaves: 1880983 -> 1848028 (-1.75%); split: +0.01%, -1.76%
Instrs: 40904711 -> 40800797 (-0.25%); split: -0.39%, +0.14%
CodeSize: 217132208 -> 216748832 (-0.18%); split: -0.29%, +0.12%
VGPRs: 3019304 -> 3099596 (+2.66%); split: -0.11%, +2.77%
Latency: 268857129 -> 265951122 (-1.08%); split: -1.33%, +0.25%
InvThroughput: 40960938 -> 41044533 (+0.20%); split: -0.18%, +0.39%
VClause: 794000 -> 782913 (-1.40%); split: -2.24%, +0.84%
SClause: 1192476 -> 1150831 (-3.49%); split: -3.94%, +0.45%
Copies: 2720470 -> 2700148 (-0.75%); split: -1.84%, +1.09%
Branches: 785926 -> 785951 (+0.00%); split: -0.01%, +0.01%
VALU: 22918411 -> 22890189 (-0.12%); split: -0.19%, +0.06%
SALU: 5281201 -> 5289486 (+0.16%); split: -0.21%, +0.36%
VOPD: 8790 -> 8685 (-1.19%); split: +1.08%, -2.28%
Totals from 62081 (77.77% of 79825) affected shaders: (Navi31)
MaxWaves: 1848555 -> 1812347 (-1.96%); split: +0.01%, -1.97%
Instrs: 39794460 -> 39704180 (-0.23%); split: -0.39%, +0.16%
CodeSize: 208987052 -> 208621524 (-0.17%); split: -0.31%, +0.13%
VGPRs: 3046284 -> 3135156 (+2.92%); split: -0.11%, +3.03%
Latency: 268863465 -> 265218186 (-1.36%); split: -1.59%, +0.23%
InvThroughput: 41101515 -> 41167075 (+0.16%); split: -0.22%, +0.38%
VClause: 795316 -> 774899 (-2.57%); split: -3.17%, +0.61%
SClause: 1177294 -> 1135451 (-3.55%); split: -4.06%, +0.51%
Copies: 2743254 -> 2725127 (-0.66%); split: -1.90%, +1.24%
Branches: 801395 -> 801428 (+0.00%); split: -0.01%, +0.02%
VALU: 23898938 -> 23871294 (-0.12%); split: -0.20%, +0.08%
SALU: 3908807 -> 3919130 (+0.26%); split: -0.23%, +0.50%
VOPD: 8529 -> 8500 (-0.34%); split: +1.29%, -1.63%
Totals from 44996 (71.01% of 63370) affected shaders: (Vega10)
MaxWaves: 307074 -> 304808 (-0.74%); split: +0.63%, -1.37%
Instrs: 22743534 -> 22716240 (-0.12%); split: -0.22%, +0.10%
CodeSize: 117284856 -> 117173212 (-0.10%); split: -0.19%, +0.09%
SGPRs: 3249008 -> 3330480 (+2.51%); split: -0.36%, +2.87%
VGPRs: 1901400 -> 1943880 (+2.23%); split: -0.60%, +2.83%
Latency: 224839126 -> 222878477 (-0.87%); split: -1.19%, +0.31%
InvThroughput: 114389570 -> 114316559 (-0.06%); split: -0.17%, +0.11%
VClause: 482012 -> 473304 (-1.81%); split: -2.86%, +1.05%
SClause: 757799 -> 717092 (-5.37%); split: -5.64%, +0.27%
Copies: 2182735 -> 2183598 (+0.04%); split: -1.17%, +1.21%
Branches: 396026 -> 395996 (-0.01%); split: -0.03%, +0.02%
VALU: 16740283 -> 16728098 (-0.07%); split: -0.14%, +0.07%
SALU: 2133575 -> 2145863 (+0.58%); split: -0.29%, +0.86%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30720>
2024-07-24 11:51:31 +02:00
|
|
|
ctx.occupancy_factor = target_waves / wave_factor;
|
aco: fix num_waves on GFX10+
There are half the SIMDs per CU and physical_vgprs should be 512 instead
of 256.
fossil-db (GFX10.3):
Totals from 3622 (2.60% of 139391) affected shaders:
VGPRs: 298192 -> 289732 (-2.84%); split: -3.43%, +0.59%
CodeSize: 29443432 -> 29458388 (+0.05%); split: -0.00%, +0.06%
MaxWaves: 21703 -> 23395 (+7.80%); split: +7.84%, -0.05%
Instrs: 5677920 -> 5681438 (+0.06%); split: -0.01%, +0.07%
Cycles: 280715524 -> 280895676 (+0.06%); split: -0.00%, +0.07%
VMEM: 981142 -> 981894 (+0.08%); split: +0.18%, -0.10%
SMEM: 243315 -> 243454 (+0.06%); split: +0.07%, -0.02%
VClause: 88991 -> 89767 (+0.87%); split: -0.02%, +0.89%
SClause: 200660 -> 200659 (-0.00%); split: -0.00%, +0.00%
Copies: 430729 -> 434160 (+0.80%); split: -0.07%, +0.86%
Branches: 158004 -> 158021 (+0.01%); split: -0.01%, +0.02%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8523>
2021-01-19 11:37:52 +00:00
|
|
|
|
2021-07-05 15:26:18 +02:00
|
|
|
/* NGG culling shaders are very sensitive to position export scheduling.
|
|
|
|
|
* Schedule less aggressively when early primitive export is used, and
|
|
|
|
|
* keep the position export at the very bottom when late primitive export is used.
|
|
|
|
|
*/
|
2025-01-25 12:12:36 +01:00
|
|
|
if (program->info.hw_stage == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
|
|
|
|
|
ctx.schedule_pos_exports = program->info.schedule_ngg_pos_exports;
|
|
|
|
|
ctx.schedule_pos_export_div = 4;
|
2021-07-05 15:26:18 +02:00
|
|
|
}
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
for (Block& block : program->blocks)
|
2024-06-13 11:55:27 +02:00
|
|
|
schedule_block(ctx, program, &block);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* update max_reg_demand and num_waves */
|
|
|
|
|
RegisterDemand new_demand;
|
|
|
|
|
for (Block& block : program->blocks) {
|
|
|
|
|
new_demand.update(block.register_demand);
|
|
|
|
|
}
|
|
|
|
|
update_vgpr_sgpr_demand(program, new_demand);
|
|
|
|
|
|
2024-07-04 16:08:46 +02:00
|
|
|
/* Validate live variable information */
|
|
|
|
|
if (!validate_live_vars(program))
|
|
|
|
|
abort();
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace aco
|