mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 22:38:05 +02:00
aco: remove 'max_waves' and use 'num_waves' to adjust for LDS and workgroup size
Totals from 21 (0.02% of 134913) affected shaders: (GFX10.3) VGPRs: 1024 -> 1176 (+14.84%) CodeSize: 127824 -> 127664 (-0.13%); split: -0.17%, +0.04% MaxWaves: 416 -> 378 (-9.13%) Instrs: 22521 -> 22502 (-0.08%); split: -0.17%, +0.09% Latency: 146386 -> 143154 (-2.21%); split: -2.21%, +0.00% InvThroughput: 28379 -> 28944 (+1.99%); split: -0.23%, +2.22% VClause: 575 -> 579 (+0.70%); split: -0.87%, +1.57% SClause: 692 -> 645 (-6.79%) Copies: 780 -> 747 (-4.23%); split: -4.74%, +0.51% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16039>
This commit is contained in:
parent
6e6ba85fd9
commit
6220046ad1
2 changed files with 8 additions and 11 deletions
|
|
@ -2052,8 +2052,6 @@ public:
|
|||
std::vector<Block> blocks;
|
||||
std::vector<RegClass> temp_rc = {s1};
|
||||
RegisterDemand max_reg_demand = RegisterDemand();
|
||||
uint16_t num_waves = 0;
|
||||
uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
|
||||
ac_shader_config* config;
|
||||
const struct radv_shader_info* info;
|
||||
enum chip_class chip_class;
|
||||
|
|
@ -2069,6 +2067,7 @@ public:
|
|||
Temp private_segment_buffer;
|
||||
Temp scratch_offset;
|
||||
|
||||
uint16_t num_waves = 0;
|
||||
uint16_t min_waves = 0;
|
||||
unsigned workgroup_size; /* if known; otherwise UINT_MAX */
|
||||
bool wgp_mode;
|
||||
|
|
|
|||
|
|
@ -368,7 +368,6 @@ calc_min_waves(Program* program)
|
|||
void
|
||||
update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
|
||||
{
|
||||
unsigned max_waves_per_simd = program->dev.max_wave64_per_simd * (64 / program->wave_size);
|
||||
unsigned simd_per_cu_wgp = program->dev.simd_per_cu * (program->wgp_mode ? 2 : 1);
|
||||
unsigned lds_limit = program->wgp_mode ? program->dev.lds_limit * 2 : program->dev.lds_limit;
|
||||
unsigned max_workgroups_per_cu_wgp = program->wgp_mode ? 32 : 16;
|
||||
|
|
@ -387,11 +386,12 @@ update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
|
|||
get_vgpr_alloc(program, new_demand.vgpr) + program->config->num_shared_vgprs / 2;
|
||||
program->num_waves =
|
||||
std::min<uint16_t>(program->num_waves, program->dev.physical_vgprs / vgpr_demand);
|
||||
program->max_waves = max_waves_per_simd;
|
||||
uint16_t max_waves = program->dev.max_wave64_per_simd * (64 / program->wave_size);
|
||||
program->num_waves = std::min(program->num_waves, max_waves);
|
||||
|
||||
/* adjust max_waves for workgroup and LDS limits */
|
||||
/* adjust num_waves for workgroup and LDS limits */
|
||||
unsigned waves_per_workgroup = calc_waves_per_workgroup(program);
|
||||
unsigned workgroups_per_cu_wgp = max_waves_per_simd * simd_per_cu_wgp / waves_per_workgroup;
|
||||
unsigned workgroups_per_cu_wgp = program->num_waves * simd_per_cu_wgp / waves_per_workgroup;
|
||||
|
||||
unsigned lds_per_workgroup =
|
||||
align(program->config->lds_size * program->dev.lds_encoding_granule,
|
||||
|
|
@ -416,12 +416,10 @@ update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
|
|||
/* in cases like waves_per_workgroup=3 or lds=65536 and
|
||||
* waves_per_workgroup=1, we want the maximum possible number of waves per
|
||||
* SIMD and not the minimum. so DIV_ROUND_UP is used */
|
||||
program->max_waves = std::min<uint16_t>(
|
||||
program->max_waves,
|
||||
DIV_ROUND_UP(workgroups_per_cu_wgp * waves_per_workgroup, simd_per_cu_wgp));
|
||||
program->num_waves =
|
||||
DIV_ROUND_UP(workgroups_per_cu_wgp * waves_per_workgroup, simd_per_cu_wgp);
|
||||
|
||||
/* incorporate max_waves and calculate max_reg_demand */
|
||||
program->num_waves = std::min<uint16_t>(program->num_waves, program->max_waves);
|
||||
/* calculate max_reg_demand */
|
||||
program->max_reg_demand.vgpr = get_addr_vgpr_from_waves(program, program->num_waves);
|
||||
program->max_reg_demand.sgpr = get_addr_sgpr_from_waves(program, program->num_waves);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue