radv: store the number of PS params heuristic to radv_compiler_info

This improves compatibility between eg. NAVI33 and PHOENIX because NGG culling is disabled by default on GFX11+. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41137>
2026-05-07 11:28:05 +02:00 · 2026-04-23 11:23:46 +02:00 · 2026-04-23 11:23:46 +02:00 · 2d78546d59
commit 2d78546d59
parent 48db5c0378
3 changed files with 12 additions and 14 deletions
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@ -1111,8 +1111,8 @@ radv_device_init_compiler_info(struct radv_device *device)
 {
   const struct radv_physical_device *pdev = radv_device_physical(device);
   struct radv_instance *instance = radv_physical_device_instance(pdev);
-
   VkShaderStageFlags dump_shaders = 0;
+   uint32_t nggc_max_ps_params = 0;

   if (instance->debug_flags & RADV_DEBUG_DUMP_VS)
      dump_shaders |= VK_SHADER_STAGE_VERTEX_BIT;
@ -1131,6 +1131,13 @@ radv_device_init_compiler_info(struct radv_device *device)
   if (instance->debug_flags & RADV_DEBUG_DUMP_CS)
      dump_shaders |= VK_SHADER_STAGE_COMPUTE_BIT | RADV_RT_STAGE_BITS;

+   if (pdev->cache_key.use_ngg_culling) {
+      /* Shader based culling efficiency can depend on PS throughput.
+       * Estimate an upper limit for PS input param count based on GPU info.
+       */
+      nggc_max_ps_params = pdev->info.has_dedicated_vram ? 12 : 8;
+   }
+
   struct radv_compiler_info info = {
      /* Hardware info */
      .ac = &pdev->info.compiler_info,
@ -1140,7 +1147,6 @@ radv_device_init_compiler_info(struct radv_device *device)
            .address32_hi = pdev->info.address32_hi,
            .rbplus_allowed = pdev->info.rbplus_allowed,
            .mesh_fast_launch_2 = pdev->info.mesh_fast_launch_2,
-            .has_dedicated_vram = pdev->info.has_dedicated_vram,
            .has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug,
            .lds_size_per_workgroup = pdev->info.lds_size_per_workgroup,
         },
@ -1204,6 +1210,7 @@ radv_device_init_compiler_info(struct radv_device *device)
      .robust_buffer_access =
         (device->vk.enabled_features.robustBufferAccess2 || device->vk.enabled_features.robustBufferAccess),
      .force_aniso = device->force_aniso,
+      .nggc_max_ps_params = nggc_max_ps_params,
      /* Wave/subgroup sizes */
      .subgroup_size = device->vk.physical->properties.subgroupSize,
      .min_subgroup_size = device->vk.physical->properties.minSubgroupSize,
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@ -893,18 +893,9 @@ radv_consider_culling(const struct radv_compiler_info *compiler_info, struct nir
   if (!compiler_info->cache_key->use_ngg_culling)
      return false;

-   /* Shader based culling efficiency can depend on PS throughput.
-    * Estimate an upper limit for PS input param count based on GPU info.
-    */
-   unsigned max_ps_params = 8;
-
-   if (compiler_info->ac->gfx_level >= GFX10_3 && compiler_info->hw.has_dedicated_vram)
-      max_ps_params = 12; /* GFX10.3 and newer discrete GPUs. */
-   else if (compiler_info->ac->gfx_level == GFX10 && compiler_info->hw.has_dedicated_vram)
-      max_ps_params = 12;
-
   /* TODO: consider other heuristics here, such as PS execution time */
-   if (util_bitcount64(ps_inputs_read) > max_ps_params)
+   assert(compiler_info->nggc_max_ps_params);
+   if (util_bitcount64(ps_inputs_read) > compiler_info->nggc_max_ps_params)
      return false;

   /* Only triangle culling is supported. */
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@ -519,7 +519,6 @@ struct radv_compiler_info {
      uint32_t address32_hi;
      bool rbplus_allowed;
      bool mesh_fast_launch_2;
-      bool has_dedicated_vram;
      bool has_cs_regalloc_hang_bug;
      uint32_t lds_size_per_workgroup;
   } hw;
@ -582,6 +581,7 @@ struct radv_compiler_info {
   bool force_vrs_enabled;
   bool robust_buffer_access; /* Only used by LLVM. */
   int force_aniso;
+   uint8_t nggc_max_ps_params;

   /* Wave/subgroup sizes */
   uint32_t subgroup_size;