ac,radeonsi: use correct VGPR granularity on Aldebaran

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9389>
This commit is contained in:
Marek Olšák 2021-03-02 23:21:39 -05:00 committed by Marge Bot
parent a9da3fc0d1
commit 975e5e262b
3 changed files with 12 additions and 5 deletions

View file

@ -51,7 +51,7 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
case R_00B848_COMPUTE_PGM_RSRC1:
case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
if (wave_size == 32)
if (wave_size == 32 || info->wave64_vgpr_alloc_granularity == 8)
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
else
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);

View file

@ -963,9 +963,15 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_ARCTURUS;
info->max_sgpr_alloc = info->family == CHIP_TONGA || info->family == CHIP_ICELAND ? 96 : 104;
info->min_wave64_vgpr_alloc = 4;
info->max_vgpr_alloc = 256;
info->wave64_vgpr_alloc_granularity = 4;
if (!info->has_graphics && info->family >= CHIP_ALDEBARAN) {
info->min_wave64_vgpr_alloc = 8;
info->max_vgpr_alloc = 512;
info->wave64_vgpr_alloc_granularity = 8;
} else {
info->min_wave64_vgpr_alloc = 4;
info->max_vgpr_alloc = 256;
info->wave64_vgpr_alloc_granularity = 4;
}
info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;

View file

@ -190,7 +190,8 @@ static void si_create_compute_state_async(void *job, int thread_index)
bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) /
(sscreen->compute_wave_size == 32 ? 8 : 4)) |
((sscreen->compute_wave_size == 32 ||
sscreen->info.wave64_vgpr_alloc_granularity == 8) ? 8 : 4)) |
S_00B848_DX10_CLAMP(1) |
S_00B848_MEM_ORDERED(si_shader_mem_ordered(shader)) |
S_00B848_WGP_MODE(sscreen->info.chip_class >= GFX10) |