radv: don't export cull distances if the shader culls against them

This increases primitive throughput for all hw with NGG if the shader
culls and the removal of cull distances reduces the number of position
exports.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35473>
This commit is contained in:
Marek Olšák 2025-07-06 21:31:25 -04:00 committed by Marge Bot
parent 0cce0505cc
commit bdcfe15457
2 changed files with 13 additions and 4 deletions

View file

@ -156,8 +156,16 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
*/
nir_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
/* If clip or cull distances are present, always cull against them if the workgroup is large enough. */
if (b->shader->info.clip_distance_array_size || b->shader->info.cull_distance_array_size) {
if (b->shader->info.cull_distance_array_size) {
/* If cull distances are present, always cull in the shader. We don't export them in order to increase
* primitive throughput.
*/
replacement = nir_imm_true(b);
break;
}
if (b->shader->info.clip_distance_array_size) {
/* If clip distances are present, cull in the shader only when the workgroup is large enough. */
replacement = nir_inot(b, small_workgroup);
break;
}

View file

@ -791,6 +791,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
options.wave_size = info->wave_size;
options.export_clipdist_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask;
options.cull_clipdist_mask = options.export_clipdist_mask;
options.dont_export_cull_distances = info->has_ngg_culling;
options.vs_output_param_offset = info->outinfo.vs_output_param_offset;
options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports;
options.can_cull = info->has_ngg_culling;
@ -1435,7 +1436,7 @@ radv_get_num_pos_exports(struct radv_shader_info *info)
info->outinfo.writes_primitive_shading_rate)
num++;
unsigned clip_cull_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask;
unsigned clip_cull_mask = info->outinfo.clip_dist_mask | (info->has_ngg_culling ? 0 : info->outinfo.cull_dist_mask);
if (clip_cull_mask & 0x0f)
num++;
@ -1636,7 +1637,7 @@ radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_sha
const bool misc_vec_ena = info->outinfo.writes_pointsize || info->outinfo.writes_layer ||
info->outinfo.writes_viewport_index || info->outinfo.writes_primitive_shading_rate;
const unsigned clip_dist_mask = info->outinfo.clip_dist_mask;
const unsigned cull_dist_mask = info->outinfo.cull_dist_mask;
const unsigned cull_dist_mask = info->has_ngg_culling ? 0 : info->outinfo.cull_dist_mask;
const unsigned total_mask = clip_dist_mask | cull_dist_mask;
info->regs.pa_cl_vs_out_cntl =