mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 11:28:05 +02:00
radv: store the number of PS params heuristic to radv_compiler_info
This improves compatibility between eg. NAVI33 and PHOENIX because NGG culling is disabled by default on GFX11+. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41137>
This commit is contained in:
parent
48db5c0378
commit
2d78546d59
3 changed files with 12 additions and 14 deletions
|
|
@ -1111,8 +1111,8 @@ radv_device_init_compiler_info(struct radv_device *device)
|
|||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
VkShaderStageFlags dump_shaders = 0;
|
||||
uint32_t nggc_max_ps_params = 0;
|
||||
|
||||
if (instance->debug_flags & RADV_DEBUG_DUMP_VS)
|
||||
dump_shaders |= VK_SHADER_STAGE_VERTEX_BIT;
|
||||
|
|
@ -1131,6 +1131,13 @@ radv_device_init_compiler_info(struct radv_device *device)
|
|||
if (instance->debug_flags & RADV_DEBUG_DUMP_CS)
|
||||
dump_shaders |= VK_SHADER_STAGE_COMPUTE_BIT | RADV_RT_STAGE_BITS;
|
||||
|
||||
if (pdev->cache_key.use_ngg_culling) {
|
||||
/* Shader based culling efficiency can depend on PS throughput.
|
||||
* Estimate an upper limit for PS input param count based on GPU info.
|
||||
*/
|
||||
nggc_max_ps_params = pdev->info.has_dedicated_vram ? 12 : 8;
|
||||
}
|
||||
|
||||
struct radv_compiler_info info = {
|
||||
/* Hardware info */
|
||||
.ac = &pdev->info.compiler_info,
|
||||
|
|
@ -1140,7 +1147,6 @@ radv_device_init_compiler_info(struct radv_device *device)
|
|||
.address32_hi = pdev->info.address32_hi,
|
||||
.rbplus_allowed = pdev->info.rbplus_allowed,
|
||||
.mesh_fast_launch_2 = pdev->info.mesh_fast_launch_2,
|
||||
.has_dedicated_vram = pdev->info.has_dedicated_vram,
|
||||
.has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug,
|
||||
.lds_size_per_workgroup = pdev->info.lds_size_per_workgroup,
|
||||
},
|
||||
|
|
@ -1204,6 +1210,7 @@ radv_device_init_compiler_info(struct radv_device *device)
|
|||
.robust_buffer_access =
|
||||
(device->vk.enabled_features.robustBufferAccess2 || device->vk.enabled_features.robustBufferAccess),
|
||||
.force_aniso = device->force_aniso,
|
||||
.nggc_max_ps_params = nggc_max_ps_params,
|
||||
/* Wave/subgroup sizes */
|
||||
.subgroup_size = device->vk.physical->properties.subgroupSize,
|
||||
.min_subgroup_size = device->vk.physical->properties.minSubgroupSize,
|
||||
|
|
|
|||
|
|
@ -893,18 +893,9 @@ radv_consider_culling(const struct radv_compiler_info *compiler_info, struct nir
|
|||
if (!compiler_info->cache_key->use_ngg_culling)
|
||||
return false;
|
||||
|
||||
/* Shader based culling efficiency can depend on PS throughput.
|
||||
* Estimate an upper limit for PS input param count based on GPU info.
|
||||
*/
|
||||
unsigned max_ps_params = 8;
|
||||
|
||||
if (compiler_info->ac->gfx_level >= GFX10_3 && compiler_info->hw.has_dedicated_vram)
|
||||
max_ps_params = 12; /* GFX10.3 and newer discrete GPUs. */
|
||||
else if (compiler_info->ac->gfx_level == GFX10 && compiler_info->hw.has_dedicated_vram)
|
||||
max_ps_params = 12;
|
||||
|
||||
/* TODO: consider other heuristics here, such as PS execution time */
|
||||
if (util_bitcount64(ps_inputs_read) > max_ps_params)
|
||||
assert(compiler_info->nggc_max_ps_params);
|
||||
if (util_bitcount64(ps_inputs_read) > compiler_info->nggc_max_ps_params)
|
||||
return false;
|
||||
|
||||
/* Only triangle culling is supported. */
|
||||
|
|
|
|||
|
|
@ -519,7 +519,6 @@ struct radv_compiler_info {
|
|||
uint32_t address32_hi;
|
||||
bool rbplus_allowed;
|
||||
bool mesh_fast_launch_2;
|
||||
bool has_dedicated_vram;
|
||||
bool has_cs_regalloc_hang_bug;
|
||||
uint32_t lds_size_per_workgroup;
|
||||
} hw;
|
||||
|
|
@ -582,6 +581,7 @@ struct radv_compiler_info {
|
|||
bool force_vrs_enabled;
|
||||
bool robust_buffer_access; /* Only used by LLVM. */
|
||||
int force_aniso;
|
||||
uint8_t nggc_max_ps_params;
|
||||
|
||||
/* Wave/subgroup sizes */
|
||||
uint32_t subgroup_size;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue