From aa53335135572518c5308c70b11c8cc5c87ecdb9 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 28 Jan 2021 11:59:21 +0000 Subject: [PATCH] ac: split lds_granularity into encode and allocation granularities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/common/ac_gpu_info.c | 9 +++++++-- src/amd/common/ac_gpu_info.h | 3 ++- src/amd/common/ac_rgp.c | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 50f2a849e68..4ed9f5ab4a9 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -722,7 +722,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, * LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used. */ info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024; - info->lds_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4; + /* lds_encode_granularity is the block size used for encoding registers. + * lds_alloc_granularity is what the hardware will align the LDS size to. + */ + info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4; + info->lds_alloc_granularity = info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity; assert(util_is_power_of_two_or_zero(dma.available_rings + 1)); assert(util_is_power_of_two_or_zero(compute.available_rings + 1)); @@ -1058,7 +1062,8 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f) fprintf(f, " tcc_harvested = %u\n", info->tcc_harvested); fprintf(f, " pc_lines = %u\n", info->pc_lines); fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup); - fprintf(f, " lds_granularity = %i\n", info->lds_granularity); + fprintf(f, " lds_alloc_granularity = %i\n", info->lds_alloc_granularity); + fprintf(f, " lds_encode_granularity = %i\n", info->lds_encode_granularity); fprintf(f, " max_memory_clock = %i\n", info->max_memory_clock); fprintf(f, " ce_ram_size = %i\n", info->ce_ram_size); fprintf(f, " l1_cache_size = %i\n", info->l1_cache_size); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 3f9cde55732..57262c3a993 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -110,7 +110,8 @@ struct radeon_info { bool tcc_harvested; unsigned pc_lines; uint32_t lds_size_per_workgroup; - uint32_t lds_granularity; + uint32_t lds_alloc_granularity; + uint32_t lds_encode_granularity; uint32_t max_memory_clock; uint32_t ce_ram_size; uint32_t l1_cache_size; diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c index ee4bc25eb6a..4c61ddb1b43 100644 --- a/src/amd/common/ac_rgp.c +++ b/src/amd/common/ac_rgp.c @@ -405,7 +405,7 @@ static void ac_fill_sqtt_asic_info(struct radeon_info *rad_info, chunk->max_memory_clock = rad_info->max_memory_clock * 1000000; chunk->memory_ops_per_clock = 0; chunk->memory_chip_type = ac_vram_type_to_sqtt_memory_type(rad_info->vram_type); - chunk->lds_granularity = rad_info->lds_granularity; + chunk->lds_granularity = rad_info->lds_encode_granularity; for (unsigned se = 0; se < 4; se++) { for (unsigned sa = 0; sa < 2; sa++) {