mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 04:20:08 +01:00
aco: limit register usage for large work groups
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
This commit is contained in:
parent
eccac46cdc
commit
b5c9688516
4 changed files with 33 additions and 7 deletions
|
|
@ -819,9 +819,14 @@ setup_isel_context(Program* program,
|
|||
program->sgpr_alloc_granule = 7;
|
||||
program->sgpr_limit = 104;
|
||||
}
|
||||
|
||||
/* TODO: we don't have to allocate VCC if we don't need it */
|
||||
program->needs_vcc = true;
|
||||
|
||||
calc_min_waves(program);
|
||||
program->vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
|
||||
program->sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
|
||||
|
||||
isel_context ctx = {};
|
||||
ctx.program = program;
|
||||
ctx.args = args;
|
||||
|
|
|
|||
|
|
@ -1155,6 +1155,7 @@ public:
|
|||
Temp private_segment_buffer;
|
||||
Temp scratch_offset;
|
||||
|
||||
uint16_t min_waves = 0;
|
||||
uint16_t lds_alloc_granule;
|
||||
uint32_t lds_limit; /* in bytes */
|
||||
uint16_t vgpr_limit;
|
||||
|
|
@ -1216,6 +1217,7 @@ void select_program(Program *program,
|
|||
void lower_wqm(Program* program, live& live_vars,
|
||||
const struct radv_nir_compiler_options *options);
|
||||
void lower_bool_phis(Program* program);
|
||||
void calc_min_waves(Program* program);
|
||||
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
|
||||
live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
|
||||
std::vector<uint16_t> dead_code_analysis(Program *program);
|
||||
|
|
|
|||
|
|
@ -228,6 +228,16 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block,
|
|||
|
||||
assert(block->index != 0 || new_demand == RegisterDemand());
|
||||
}
|
||||
|
||||
unsigned calc_waves_per_workgroup(Program *program)
|
||||
{
|
||||
unsigned workgroup_size = program->wave_size;
|
||||
if (program->stage == compute_cs) {
|
||||
unsigned* bsize = program->info->cs.block_size;
|
||||
workgroup_size = bsize[0] * bsize[1] * bsize[2];
|
||||
}
|
||||
return align(workgroup_size, program->wave_size) / program->wave_size;
|
||||
}
|
||||
} /* end namespace */
|
||||
|
||||
uint16_t get_extra_sgprs(Program *program)
|
||||
|
|
@ -284,6 +294,20 @@ uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves)
|
|||
return std::min(vgprs, program->vgpr_limit);
|
||||
}
|
||||
|
||||
void calc_min_waves(Program* program)
|
||||
{
|
||||
unsigned waves_per_workgroup = calc_waves_per_workgroup(program);
|
||||
/* currently min_waves is in wave64 waves */
|
||||
if (program->wave_size == 32)
|
||||
waves_per_workgroup = DIV_ROUND_UP(waves_per_workgroup, 2);
|
||||
|
||||
unsigned simd_per_cu = 4; /* TODO: different on Navi */
|
||||
bool wgp = program->chip_class >= GFX10; /* assume WGP is used on Navi */
|
||||
unsigned simd_per_cu_wgp = wgp ? simd_per_cu * 2 : simd_per_cu;
|
||||
|
||||
program->min_waves = DIV_ROUND_UP(waves_per_workgroup, simd_per_cu_wgp);
|
||||
}
|
||||
|
||||
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
|
||||
{
|
||||
/* TODO: max_waves_per_simd, simd_per_cu and the number of physical vgprs for Navi */
|
||||
|
|
@ -304,13 +328,7 @@ void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
|
|||
program->max_waves = max_waves_per_simd;
|
||||
|
||||
/* adjust max_waves for workgroup and LDS limits */
|
||||
unsigned workgroup_size = program->wave_size;
|
||||
if (program->stage == compute_cs) {
|
||||
unsigned* bsize = program->info->cs.block_size;
|
||||
workgroup_size = bsize[0] * bsize[1] * bsize[2];
|
||||
}
|
||||
unsigned waves_per_workgroup = align(workgroup_size, program->wave_size) / program->wave_size;
|
||||
|
||||
unsigned waves_per_workgroup = calc_waves_per_workgroup(program);
|
||||
unsigned workgroups_per_cu_wgp = max_waves_per_simd * simd_per_cu_wgp / waves_per_workgroup;
|
||||
if (program->config->lds_size) {
|
||||
unsigned lds = program->config->lds_size * program->lds_alloc_granule;
|
||||
|
|
|
|||
|
|
@ -932,6 +932,7 @@ void schedule_program(Program *program, live& live_vars)
|
|||
ctx.num_waves = 7;
|
||||
else
|
||||
ctx.num_waves = 8;
|
||||
ctx.num_waves = std::max<uint16_t>(ctx.num_waves, program->min_waves);
|
||||
|
||||
assert(ctx.num_waves > 0 && ctx.num_waves <= program->num_waves);
|
||||
ctx.max_registers = { int16_t(get_addr_vgpr_from_waves(program, ctx.num_waves) - 2),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue