From 975e5e262bb4522a784c9b32ab19bcc64ba95185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 2 Mar 2021 23:21:39 -0500 Subject: [PATCH] ac,radeonsi: use correct VGPR granularity on Aldebaran Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/common/ac_binary.c | 2 +- src/amd/common/ac_gpu_info.c | 12 +++++++++--- src/gallium/drivers/radeonsi/si_compute.c | 3 ++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c index 93068696c32..4fdef596132 100644 --- a/src/amd/common/ac_binary.c +++ b/src/amd/common/ac_binary.c @@ -51,7 +51,7 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav case R_00B228_SPI_SHADER_PGM_RSRC1_GS: case R_00B848_COMPUTE_PGM_RSRC1: case R_00B428_SPI_SHADER_PGM_RSRC1_HS: - if (wave_size == 32) + if (wave_size == 32 || info->wave64_vgpr_alloc_granularity == 8) conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8); else conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 31060857953..947c8117c12 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -963,9 +963,15 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_ARCTURUS; info->max_sgpr_alloc = info->family == CHIP_TONGA || info->family == CHIP_ICELAND ? 96 : 104; - info->min_wave64_vgpr_alloc = 4; - info->max_vgpr_alloc = 256; - info->wave64_vgpr_alloc_granularity = 4; + if (!info->has_graphics && info->family >= CHIP_ALDEBARAN) { + info->min_wave64_vgpr_alloc = 8; + info->max_vgpr_alloc = 512; + info->wave64_vgpr_alloc_granularity = 8; + } else { + info->min_wave64_vgpr_alloc = 4; + info->max_vgpr_alloc = 256; + info->wave64_vgpr_alloc_granularity = 4; + } info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256; info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e563763f78b..00e6a98ff76 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -190,7 +190,8 @@ static void si_create_compute_state_async(void *job, int thread_index) bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0; shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) / - (sscreen->compute_wave_size == 32 ? 8 : 4)) | + ((sscreen->compute_wave_size == 32 || + sscreen->info.wave64_vgpr_alloc_granularity == 8) ? 8 : 4)) | S_00B848_DX10_CLAMP(1) | S_00B848_MEM_ORDERED(si_shader_mem_ordered(shader)) | S_00B848_WGP_MODE(sscreen->info.chip_class >= GFX10) |