From b1b581f855ff4ac93287708ec5337dfd6b502249 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 30 May 2025 06:21:10 -0400 Subject: [PATCH] ac/nir/lower_ngg: add an option not to export cull distances if the shader culls them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Timur Kristóf Part-of: --- src/amd/common/nir/ac_nir.h | 6 ++++++ src/amd/common/nir/ac_nir_create_gs_copy_shader.c | 5 +++-- src/amd/common/nir/ac_nir_helpers.h | 1 + src/amd/common/nir/ac_nir_lower_legacy_vs.c | 5 +++-- src/amd/common/nir/ac_nir_lower_ngg.c | 1 + src/amd/common/nir/ac_nir_lower_ngg_gs.c | 1 + src/amd/common/nir/ac_nir_lower_ngg_mesh.c | 2 +- src/amd/common/nir/ac_nir_prerast_utils.c | 6 ++++++ 8 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index b9857878cb7..47a30ab9d92 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -172,6 +172,12 @@ typedef struct { * either against CLIP_VERTEX or POS. */ uint8_t cull_clipdist_mask; + /* This skips exporting cull distances to increase throughput by reducing the number of pos exports. + * If this is set, cull_clipdist_mask must be set to cull against cull distances in the shader because + * the hw won't do it without the exports. The best case scenario is 100% increase in throughput + * (2 pos exports -> 1 pos export). + */ + bool dont_export_cull_distances; bool write_pos_to_clipvertex; /* Remove clip/cull distance components that are missing in export_clipdist_mask, improving * throughput by up to 50% (3 pos exports -> 2 pos exports). The caller shouldn't set no-op diff --git a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c index 4a3e4397edc..44774bdb26c 100644 --- a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c +++ b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c @@ -120,8 +120,9 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, if (kill_layer) export_outputs &= ~VARYING_BIT_LAYER; - ac_nir_export_position(&b, gfx_level, export_clipdist_mask, write_pos_to_clipvertex, pack_clip_cull_distances, - !has_param_exports, force_vrs, export_outputs, &out, NULL); + ac_nir_export_position(&b, gfx_level, export_clipdist_mask, false, write_pos_to_clipvertex, + pack_clip_cull_distances, !has_param_exports, force_vrs, export_outputs, + &out, NULL); if (has_param_exports) { ac_nir_export_parameters(&b, param_offsets, diff --git a/src/amd/common/nir/ac_nir_helpers.h b/src/amd/common/nir/ac_nir_helpers.h index 590bae7bbcb..84c7f69ec8f 100644 --- a/src/amd/common/nir/ac_nir_helpers.h +++ b/src/amd/common/nir/ac_nir_helpers.h @@ -134,6 +134,7 @@ void ac_nir_export_position(nir_builder *b, enum amd_gfx_level gfx_level, uint32_t export_clipdist_mask, + bool dont_export_cull_distances, bool write_pos_to_clipvertex, bool pack_clip_cull_distances, bool no_param_export, diff --git a/src/amd/common/nir/ac_nir_lower_legacy_vs.c b/src/amd/common/nir/ac_nir_lower_legacy_vs.c index 75e1f7fc0e6..4cd0163c12f 100644 --- a/src/amd/common/nir/ac_nir_lower_legacy_vs.c +++ b/src/amd/common/nir/ac_nir_lower_legacy_vs.c @@ -78,8 +78,9 @@ ac_nir_lower_legacy_vs(nir_shader *nir, if (kill_layer) export_outputs &= ~VARYING_BIT_LAYER; - ac_nir_export_position(&b, gfx_level, export_clipdist_mask, write_pos_to_clipvertex, pack_clip_cull_distances, - !has_param_exports, force_vrs, export_outputs, &out, NULL); + ac_nir_export_position(&b, gfx_level, export_clipdist_mask, false, write_pos_to_clipvertex, + pack_clip_cull_distances, !has_param_exports, force_vrs, export_outputs, + &out, NULL); if (has_param_exports) { ac_nir_export_parameters(&b, param_offsets, diff --git a/src/amd/common/nir/ac_nir_lower_ngg.c b/src/amd/common/nir/ac_nir_lower_ngg.c index 139fa22c5bf..6ebbeaeb942 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg.c +++ b/src/amd/common/nir/ac_nir_lower_ngg.c @@ -1746,6 +1746,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option ac_nir_export_position(b, options->hw_info->gfx_level, options->export_clipdist_mask, + options->dont_export_cull_distances, options->write_pos_to_clipvertex, options->pack_clip_cull_distances, !options->has_param_exports, diff --git a/src/amd/common/nir/ac_nir_lower_ngg_gs.c b/src/amd/common/nir/ac_nir_lower_ngg_gs.c index 18c6649dcac..1e7db649cd8 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg_gs.c +++ b/src/amd/common/nir/ac_nir_lower_ngg_gs.c @@ -469,6 +469,7 @@ ngg_gs_emit_output(nir_builder *b, nir_def *max_num_out_vtx, nir_def *max_num_ou ac_nir_export_position(b, s->options->hw_info->gfx_level, s->options->export_clipdist_mask, + s->options->dont_export_cull_distances, s->options->write_pos_to_clipvertex, s->options->pack_clip_cull_distances, !s->options->has_param_exports, diff --git a/src/amd/common/nir/ac_nir_lower_ngg_mesh.c b/src/amd/common/nir/ac_nir_lower_ngg_mesh.c index 0aec9840c9f..a6c212a2965 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg_mesh.c +++ b/src/amd/common/nir/ac_nir_lower_ngg_mesh.c @@ -887,7 +887,7 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool ms_emit_arrayed_outputs(b, index, per_vertex_outputs, s); if (exports) { - ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, false, + ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, false, false, !s->has_param_exports, false, s->per_vertex_outputs | VARYING_BIT_POS, &s->out, row); } diff --git a/src/amd/common/nir/ac_nir_prerast_utils.c b/src/amd/common/nir/ac_nir_prerast_utils.c index 9c2188b96f1..fc26f4d7b1f 100644 --- a/src/amd/common/nir/ac_nir_prerast_utils.c +++ b/src/amd/common/nir/ac_nir_prerast_utils.c @@ -290,6 +290,7 @@ void ac_nir_export_position(nir_builder *b, enum amd_gfx_level gfx_level, uint32_t export_clipdist_mask, + bool dont_export_cull_distances, bool write_pos_to_clipvertex, bool pack_clip_cull_distances, bool no_param_export, @@ -302,6 +303,11 @@ ac_nir_export_position(nir_builder *b, unsigned exp_num = 0; unsigned exp_pos_offset = 0; + if (dont_export_cull_distances) { + export_clipdist_mask &= ~BITFIELD_RANGE(b->shader->info.clip_distance_array_size, + b->shader->info.cull_distance_array_size); + } + uint64_t mask = VARYING_BIT_PSIZ | VARYING_BIT_EDGE |