From 2d78546d59c18b81a3c06df4a0e10130af4c79db Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 23 Apr 2026 11:23:46 +0200 Subject: [PATCH] radv: store the number of PS params heuristic to radv_compiler_info This improves compatibility between eg. NAVI33 and PHOENIX because NGG culling is disabled by default on GFX11+. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_device.c | 11 +++++++++-- src/amd/vulkan/radv_shader.c | 13 ++----------- src/amd/vulkan/radv_shader.h | 2 +- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index e1fe3bd537a..e4a553a8b4c 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1111,8 +1111,8 @@ radv_device_init_compiler_info(struct radv_device *device) { const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_instance *instance = radv_physical_device_instance(pdev); - VkShaderStageFlags dump_shaders = 0; + uint32_t nggc_max_ps_params = 0; if (instance->debug_flags & RADV_DEBUG_DUMP_VS) dump_shaders |= VK_SHADER_STAGE_VERTEX_BIT; @@ -1131,6 +1131,13 @@ radv_device_init_compiler_info(struct radv_device *device) if (instance->debug_flags & RADV_DEBUG_DUMP_CS) dump_shaders |= VK_SHADER_STAGE_COMPUTE_BIT | RADV_RT_STAGE_BITS; + if (pdev->cache_key.use_ngg_culling) { + /* Shader based culling efficiency can depend on PS throughput. + * Estimate an upper limit for PS input param count based on GPU info. + */ + nggc_max_ps_params = pdev->info.has_dedicated_vram ? 12 : 8; + } + struct radv_compiler_info info = { /* Hardware info */ .ac = &pdev->info.compiler_info, @@ -1140,7 +1147,6 @@ radv_device_init_compiler_info(struct radv_device *device) .address32_hi = pdev->info.address32_hi, .rbplus_allowed = pdev->info.rbplus_allowed, .mesh_fast_launch_2 = pdev->info.mesh_fast_launch_2, - .has_dedicated_vram = pdev->info.has_dedicated_vram, .has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug, .lds_size_per_workgroup = pdev->info.lds_size_per_workgroup, }, @@ -1204,6 +1210,7 @@ radv_device_init_compiler_info(struct radv_device *device) .robust_buffer_access = (device->vk.enabled_features.robustBufferAccess2 || device->vk.enabled_features.robustBufferAccess), .force_aniso = device->force_aniso, + .nggc_max_ps_params = nggc_max_ps_params, /* Wave/subgroup sizes */ .subgroup_size = device->vk.physical->properties.subgroupSize, .min_subgroup_size = device->vk.physical->properties.minSubgroupSize, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 64a80f0a80f..6c4b3bea714 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -893,18 +893,9 @@ radv_consider_culling(const struct radv_compiler_info *compiler_info, struct nir if (!compiler_info->cache_key->use_ngg_culling) return false; - /* Shader based culling efficiency can depend on PS throughput. - * Estimate an upper limit for PS input param count based on GPU info. - */ - unsigned max_ps_params = 8; - - if (compiler_info->ac->gfx_level >= GFX10_3 && compiler_info->hw.has_dedicated_vram) - max_ps_params = 12; /* GFX10.3 and newer discrete GPUs. */ - else if (compiler_info->ac->gfx_level == GFX10 && compiler_info->hw.has_dedicated_vram) - max_ps_params = 12; - /* TODO: consider other heuristics here, such as PS execution time */ - if (util_bitcount64(ps_inputs_read) > max_ps_params) + assert(compiler_info->nggc_max_ps_params); + if (util_bitcount64(ps_inputs_read) > compiler_info->nggc_max_ps_params) return false; /* Only triangle culling is supported. */ diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 50993143643..42801931304 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -519,7 +519,6 @@ struct radv_compiler_info { uint32_t address32_hi; bool rbplus_allowed; bool mesh_fast_launch_2; - bool has_dedicated_vram; bool has_cs_regalloc_hang_bug; uint32_t lds_size_per_workgroup; } hw; @@ -582,6 +581,7 @@ struct radv_compiler_info { bool force_vrs_enabled; bool robust_buffer_access; /* Only used by LLVM. */ int force_aniso; + uint8_t nggc_max_ps_params; /* Wave/subgroup sizes */ uint32_t subgroup_size;