From e40457b1365767d15ffa3a18f5d4e12623cff87e Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 24 Apr 2026 13:27:43 +0100 Subject: [PATCH] ac: move lds_size_per_workgroup to ac_compiler_info Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_gpu_info.c | 20 +++++++++---------- src/amd/common/ac_gpu_info.h | 3 ++- src/amd/common/ac_rgp.c | 2 +- .../vulkan/nir/radv_nir_lower_ray_queries.c | 2 +- src/amd/vulkan/radv_device.c | 1 - src/amd/vulkan/radv_physical_device.c | 2 +- src/amd/vulkan/radv_shader.c | 5 +++-- src/amd/vulkan/radv_shader.h | 1 - src/gallium/drivers/radeonsi/si_shader.c | 4 ++-- .../winsys/radeon/drm/radeon_drm_winsys.c | 1 - 10 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index e4f86cd2d6c..9378c77f424 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -236,7 +236,7 @@ static bool handle_env_var_force_family(struct radeon_info *info) void ac_fill_compiler_info(struct radeon_info *info, const struct drm_amdgpu_info_device *device_info) { - STATIC_ASSERT(sizeof(struct ac_compiler_info) == 52); + STATIC_ASSERT(sizeof(struct ac_compiler_info) == 56); struct ac_compiler_info *out = &info->compiler_info; @@ -303,6 +303,14 @@ ac_fill_compiler_info(struct radeon_info *info, const struct drm_amdgpu_info_dev out->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4; + /* LDS is 64KB per CU (4 SIMDs on GFX6-9, which is 16KB per SIMD). + * + * GFX10+: LDS is 128KB in WGP mode, but a workgroup can only use up to 64KB. + * GFX7+: Workgroups can use up to 64KB. + * GFX6: There is 64KB LDS per CU, but a workgroup can only use up to 32KB. + */ + out->lds_size_per_workgroup = info->gfx_level >= GFX7 ? 64 * 1024 : 32 * 1024; + out->hs_offchip_workgroup_dw_size = info->hs_offchip_workgroup_dw_size; /* Flags */ @@ -1146,14 +1154,6 @@ void ac_fill_hw_info(struct radeon_info *info, const struct drm_amdgpu_info_devi info->sqc_scalar_cache_size = device_info->sqc_data_cache_size * 1024; info->num_sqc_per_wgp = device_info->num_sqc_per_wgp; - /* LDS is 64KB per CU (4 SIMDs on GFX6-9, which is 16KB per SIMD). - * - * GFX10+: LDS is 128KB in WGP mode, but a workgroup can only use up to 64KB. - * GFX7+: Workgroups can use up to 64KB. - * GFX6: There is 64KB LDS per CU, but a workgroup can only use up to 32KB. - */ - info->lds_size_per_workgroup = info->gfx_level >= GFX7 ? 64 * 1024 : 32 * 1024; - /* Get the number of good compute units. */ info->num_cu = 0; for (int i = 0; i < info->max_se; i++) { @@ -1913,7 +1913,6 @@ void ac_print_gpu_info(FILE *f, const struct radeon_info *info, int fd) fprintf(f, " tcc_rb_non_coherent = %u\n", info->tcc_rb_non_coherent); fprintf(f, " cp_sdma_ge_use_system_memory_scope = %u\n", info->cp_sdma_ge_use_system_memory_scope); fprintf(f, " pc_lines = %u\n", info->pc_lines); - fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup); fprintf(f, " lds_alloc_granularity = %i\n", ac_shader_get_lds_alloc_granularity(info->gfx_level)); fprintf(f, " max_memory_clock = %i MHz\n", info->memory_freq_mhz); @@ -2053,6 +2052,7 @@ void ac_print_gpu_info(FILE *f, const struct radeon_info *info, int fd) fprintf(f, " max_vgpr_alloc = %i\n", info->compiler_info.max_vgpr_alloc); fprintf(f, " wave64_vgpr_alloc_granularity = %i\n", info->compiler_info.wave64_vgpr_alloc_granularity); + fprintf(f, " lds_size_per_workgroup = %u\n", info->compiler_info.lds_size_per_workgroup); fprintf(f, " has_lds_bank_count_16 = %i\n", info->compiler_info.has_lds_bank_count_16); fprintf(f, " has_sram_ecc_enabled = %i\n", info->compiler_info.has_sram_ecc_enabled); fprintf(f, " has_point_sample_accel = %i\n", info->compiler_info.has_point_sample_accel); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index a998006d1e7..dd5769e2b37 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -116,6 +116,8 @@ struct ac_compiler_info { uint32_t max_vgpr_alloc; uint32_t wave64_vgpr_alloc_granularity; + uint32_t lds_size_per_workgroup; + uint32_t hs_offchip_workgroup_dw_size; /* Flags */ @@ -323,7 +325,6 @@ struct radeon_info { bool cp_sdma_ge_use_system_memory_scope; bool cp_dma_use_L2; unsigned pc_lines; - uint32_t lds_size_per_workgroup; /* CP info. */ bool gfx_ib_pad_with_type2; diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c index 8850fda06de..95a95cb1d7f 100644 --- a/src/amd/common/ac_rgp.c +++ b/src/amd/common/ac_rgp.c @@ -472,7 +472,7 @@ static void ac_sqtt_fill_asic_info(const struct radeon_info *rad_info, chunk->vram_size = (uint64_t)rad_info->vram_size_kb * 1024; chunk->l2_cache_size = rad_info->l2_cache_size; chunk->l1_cache_size = rad_info->tcp_cache_size; - chunk->lds_size = rad_info->lds_size_per_workgroup; + chunk->lds_size = rad_info->compiler_info.lds_size_per_workgroup; strncpy(chunk->gpu_name, ac_get_family_name(rad_info->family), SQTT_GPU_NAME_MAX_SIZE - 1); diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index 1c48a816207..df10025c3f7 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -169,7 +169,7 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray uint32_t shared_offset = align(shader->info.shared_size, 4); if (shader->info.stage != MESA_SHADER_COMPUTE || glsl_type_is_array(opaque_type) || - shared_offset + shared_stack_size > compiler_info->hw.lds_size_per_workgroup) { + shared_offset + shared_stack_size > compiler_info->ac->lds_size_per_workgroup) { dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT; } else { if (radv_use_bvh_stack_rtn(compiler_info)) { diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 0e43317b7e7..3bca428221c 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1147,7 +1147,6 @@ radv_device_init_compiler_info(struct radv_device *device) .address32_hi = pdev->info.address32_hi, .rbplus_allowed = pdev->info.rbplus_allowed, .has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug, - .lds_size_per_workgroup = pdev->info.lds_size_per_workgroup, }, /* Debug/tracing */ .debug = diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 09ba0f1d3c7..b9d8bbd9b32 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -1803,7 +1803,7 @@ radv_get_physical_device_properties(struct radv_physical_device *pdev) .maxFragmentOutputAttachments = 8, .maxFragmentDualSrcAttachments = 1, .maxFragmentCombinedOutputResources = max_descriptor_set_size, - .maxComputeSharedMemorySize = pdev->info.lds_size_per_workgroup, + .maxComputeSharedMemorySize = pdev->info.compiler_info.lds_size_per_workgroup, .maxComputeWorkGroupCount = {4294967295, 65535, 65535}, .maxComputeWorkGroupInvocations = 1024, .maxComputeWorkGroupSize = {1024, 1024, 1024}, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index ec3544da737..87da690b16c 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2245,7 +2245,7 @@ radv_postprocess_binary_config(const struct radv_compiler_info *compiler_info, s } } - assert(config->lds_size <= compiler_info->hw.lds_size_per_workgroup); + assert(config->lds_size <= compiler_info->ac->lds_size_per_workgroup); unsigned lds_alloc = ac_shader_encode_lds_size(config->lds_size, gfx_level, stage); switch (stage) { @@ -2857,7 +2857,8 @@ radv_get_max_waves(const struct radv_device *device, const struct ac_shader_conf simd_per_cu_wgp *= 2; if (lds_per_workgroup) { - unsigned lds_per_cu_wgp = gpu_info->lds_size_per_workgroup * (gfx_level >= GFX10 && conf->wgp_mode ? 2 : 1); + unsigned lds_per_cu_wgp = + gpu_info->compiler_info.lds_size_per_workgroup * (gfx_level >= GFX10 && conf->wgp_mode ? 2 : 1); unsigned max_cu_wgp_waves = lds_per_cu_wgp / lds_per_workgroup * waves_per_workgroup; max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_cu_wgp_waves, simd_per_cu_wgp)); } diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 1d6b800faad..1ec01650d6a 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -519,7 +519,6 @@ struct radv_compiler_info { uint32_t address32_hi; bool rbplus_allowed; bool has_cs_regalloc_hang_bug; - uint32_t lds_size_per_workgroup; } hw; /* Debug/tracing */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c2449693c35..bc80c982cd8 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -218,7 +218,7 @@ unsigned si_calculate_needed_lds_size(enum amd_gfx_level gfx_level, struct si_sh } /* Check that the LDS size is within hw limits. */ - assert(lds_size <= shader->selector->screen->info.lds_size_per_workgroup); + assert(lds_size <= shader->selector->screen->info.compiler_info.lds_size_per_workgroup); return lds_size; } @@ -305,7 +305,7 @@ static void si_calculate_max_simd_waves(struct si_shader *shader) max_simd_waves = MIN2(max_simd_waves, max_vgprs / num_vgprs); } - unsigned max_lds_per_simd = sscreen->info.lds_size_per_workgroup / sscreen->info.compiler_info.num_simd_per_compute_unit; + unsigned max_lds_per_simd = sscreen->info.compiler_info.lds_size_per_workgroup / sscreen->info.compiler_info.num_simd_per_compute_unit; if (lds_per_wave) max_simd_waves = MIN2(max_simd_waves, max_lds_per_simd / lds_per_wave); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index ff15c97dd70..93679201d22 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -639,7 +639,6 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) ws->info.max_gflops = 128 * ws->info.num_cu * ws->info.max_gpu_freq_mhz / 1000; ws->info.num_tcc_blocks = ws->info.max_tcc_blocks; ws->info.tcp_cache_size = 16 * 1024; - ws->info.lds_size_per_workgroup = ws->info.gfx_level == GFX7 ? 64 * 1024 : 32 * 1024; #ifdef HAVE_GALLIUM_RADEONSI ac_fill_compiler_info(&ws->info, NULL);