radv: store the number of PS params heuristic to radv_compiler_info
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

This improves compatibility between eg. NAVI33 and PHOENIX because
NGG culling is disabled by default on GFX11+.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41137>
This commit is contained in:
Samuel Pitoiset 2026-04-23 11:23:46 +02:00 committed by Marge Bot
parent 48db5c0378
commit 2d78546d59
3 changed files with 12 additions and 14 deletions

View file

@ -1111,8 +1111,8 @@ radv_device_init_compiler_info(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_instance *instance = radv_physical_device_instance(pdev);
VkShaderStageFlags dump_shaders = 0;
uint32_t nggc_max_ps_params = 0;
if (instance->debug_flags & RADV_DEBUG_DUMP_VS)
dump_shaders |= VK_SHADER_STAGE_VERTEX_BIT;
@ -1131,6 +1131,13 @@ radv_device_init_compiler_info(struct radv_device *device)
if (instance->debug_flags & RADV_DEBUG_DUMP_CS)
dump_shaders |= VK_SHADER_STAGE_COMPUTE_BIT | RADV_RT_STAGE_BITS;
if (pdev->cache_key.use_ngg_culling) {
/* Shader based culling efficiency can depend on PS throughput.
* Estimate an upper limit for PS input param count based on GPU info.
*/
nggc_max_ps_params = pdev->info.has_dedicated_vram ? 12 : 8;
}
struct radv_compiler_info info = {
/* Hardware info */
.ac = &pdev->info.compiler_info,
@ -1140,7 +1147,6 @@ radv_device_init_compiler_info(struct radv_device *device)
.address32_hi = pdev->info.address32_hi,
.rbplus_allowed = pdev->info.rbplus_allowed,
.mesh_fast_launch_2 = pdev->info.mesh_fast_launch_2,
.has_dedicated_vram = pdev->info.has_dedicated_vram,
.has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug,
.lds_size_per_workgroup = pdev->info.lds_size_per_workgroup,
},
@ -1204,6 +1210,7 @@ radv_device_init_compiler_info(struct radv_device *device)
.robust_buffer_access =
(device->vk.enabled_features.robustBufferAccess2 || device->vk.enabled_features.robustBufferAccess),
.force_aniso = device->force_aniso,
.nggc_max_ps_params = nggc_max_ps_params,
/* Wave/subgroup sizes */
.subgroup_size = device->vk.physical->properties.subgroupSize,
.min_subgroup_size = device->vk.physical->properties.minSubgroupSize,

View file

@ -893,18 +893,9 @@ radv_consider_culling(const struct radv_compiler_info *compiler_info, struct nir
if (!compiler_info->cache_key->use_ngg_culling)
return false;
/* Shader based culling efficiency can depend on PS throughput.
* Estimate an upper limit for PS input param count based on GPU info.
*/
unsigned max_ps_params = 8;
if (compiler_info->ac->gfx_level >= GFX10_3 && compiler_info->hw.has_dedicated_vram)
max_ps_params = 12; /* GFX10.3 and newer discrete GPUs. */
else if (compiler_info->ac->gfx_level == GFX10 && compiler_info->hw.has_dedicated_vram)
max_ps_params = 12;
/* TODO: consider other heuristics here, such as PS execution time */
if (util_bitcount64(ps_inputs_read) > max_ps_params)
assert(compiler_info->nggc_max_ps_params);
if (util_bitcount64(ps_inputs_read) > compiler_info->nggc_max_ps_params)
return false;
/* Only triangle culling is supported. */

View file

@ -519,7 +519,6 @@ struct radv_compiler_info {
uint32_t address32_hi;
bool rbplus_allowed;
bool mesh_fast_launch_2;
bool has_dedicated_vram;
bool has_cs_regalloc_hang_bug;
uint32_t lds_size_per_workgroup;
} hw;
@ -582,6 +581,7 @@ struct radv_compiler_info {
bool force_vrs_enabled;
bool robust_buffer_access; /* Only used by LLVM. */
int force_aniso;
uint8_t nggc_max_ps_params;
/* Wave/subgroup sizes */
uint32_t subgroup_size;