mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
aco/gfx11: increase gfx1100/gfx1101 physical vgprs
https://reviews.llvm.org/D134522 Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18825>
This commit is contained in:
parent
67ebe86f0c
commit
50073d6135
5 changed files with 18 additions and 10 deletions
|
|
@ -12563,7 +12563,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shade
|
|||
program->config->float_mode = program->blocks[0].fp_mode.val;
|
||||
/* addition on GFX6-8 requires a carry-out (we use VCC) */
|
||||
program->needs_vcc = program->gfx_level <= GFX8;
|
||||
program->config->num_vgprs = get_vgpr_alloc(program, num_vgprs);
|
||||
program->config->num_vgprs = std::min<uint16_t>(get_vgpr_alloc(program, num_vgprs), 256);
|
||||
program->config->num_sgprs = get_sgpr_alloc(program, num_sgprs);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -111,14 +111,20 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
|
||||
if (gfx_level >= GFX10) {
|
||||
program->dev.physical_sgprs = 5120; /* doesn't matter as long as it's at least 128 * 40 */
|
||||
program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512;
|
||||
program->dev.sgpr_alloc_granule = 128;
|
||||
program->dev.sgpr_limit =
|
||||
108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
|
||||
if (gfx_level == GFX10_3)
|
||||
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8;
|
||||
else
|
||||
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4;
|
||||
|
||||
if (family == CHIP_GFX1100 || family == CHIP_GFX1101) {
|
||||
program->dev.physical_vgprs = program->wave_size == 32 ? 1536 : 768;
|
||||
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 24 : 12;
|
||||
} else {
|
||||
program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512;
|
||||
if (gfx_level >= GFX10_3)
|
||||
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8;
|
||||
else
|
||||
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4;
|
||||
}
|
||||
} else if (program->gfx_level >= GFX8) {
|
||||
program->dev.physical_sgprs = 800;
|
||||
program->dev.sgpr_alloc_granule = 16;
|
||||
|
|
|
|||
|
|
@ -2134,7 +2134,7 @@ struct DeviceInfo {
|
|||
uint16_t vgpr_limit;
|
||||
uint16_t sgpr_limit;
|
||||
uint16_t sgpr_alloc_granule;
|
||||
uint16_t vgpr_alloc_granule; /* must be power of two */
|
||||
uint16_t vgpr_alloc_granule;
|
||||
unsigned max_wave64_per_simd;
|
||||
unsigned simd_per_cu;
|
||||
bool has_fast_fma32 = false;
|
||||
|
|
|
|||
|
|
@ -348,7 +348,7 @@ get_vgpr_alloc(Program* program, uint16_t addressable_vgprs)
|
|||
{
|
||||
assert(addressable_vgprs <= program->dev.vgpr_limit);
|
||||
uint16_t granule = program->dev.vgpr_alloc_granule;
|
||||
return align(std::max(addressable_vgprs, granule), granule);
|
||||
return ALIGN_NPOT(std::max(addressable_vgprs, granule), granule);
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
@ -370,7 +370,8 @@ get_addr_sgpr_from_waves(Program* program, uint16_t waves)
|
|||
uint16_t
|
||||
get_addr_vgpr_from_waves(Program* program, uint16_t waves)
|
||||
{
|
||||
uint16_t vgprs = program->dev.physical_vgprs / waves & ~(program->dev.vgpr_alloc_granule - 1);
|
||||
uint16_t vgprs = program->dev.physical_vgprs / waves;
|
||||
vgprs = vgprs / program->dev.vgpr_alloc_granule * program->dev.vgpr_alloc_granule;
|
||||
vgprs -= program->config->num_shared_vgprs / 2;
|
||||
return std::min(vgprs, program->dev.vgpr_limit);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3149,7 +3149,8 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
} /* end for BB */
|
||||
|
||||
/* num_gpr = rnd_up(max_used_gpr + 1) */
|
||||
program->config->num_vgprs = get_vgpr_alloc(program, ctx.max_used_vgpr + 1);
|
||||
program->config->num_vgprs =
|
||||
std::min<uint16_t>(get_vgpr_alloc(program, ctx.max_used_vgpr + 1), 256);
|
||||
program->config->num_sgprs = get_sgpr_alloc(program, ctx.max_used_sgpr + 1);
|
||||
|
||||
program->progress = CompilationProgress::after_ra;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue