ac/nir/lower_ngg: add an option not to export cull distances if the shader culls them

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35352>
This commit is contained in:
Marek Olšák 2025-05-30 06:21:10 -04:00 committed by Marge Bot
parent 8c04a91d12
commit b1b581f855
8 changed files with 22 additions and 5 deletions

View file

@ -172,6 +172,12 @@ typedef struct {
* either against CLIP_VERTEX or POS.
*/
uint8_t cull_clipdist_mask;
/* This skips exporting cull distances to increase throughput by reducing the number of pos exports.
* If this is set, cull_clipdist_mask must be set to cull against cull distances in the shader because
* the hw won't do it without the exports. The best case scenario is 100% increase in throughput
* (2 pos exports -> 1 pos export).
*/
bool dont_export_cull_distances;
bool write_pos_to_clipvertex;
/* Remove clip/cull distance components that are missing in export_clipdist_mask, improving
* throughput by up to 50% (3 pos exports -> 2 pos exports). The caller shouldn't set no-op

View file

@ -120,8 +120,9 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
if (kill_layer)
export_outputs &= ~VARYING_BIT_LAYER;
ac_nir_export_position(&b, gfx_level, export_clipdist_mask, write_pos_to_clipvertex, pack_clip_cull_distances,
!has_param_exports, force_vrs, export_outputs, &out, NULL);
ac_nir_export_position(&b, gfx_level, export_clipdist_mask, false, write_pos_to_clipvertex,
pack_clip_cull_distances, !has_param_exports, force_vrs, export_outputs,
&out, NULL);
if (has_param_exports) {
ac_nir_export_parameters(&b, param_offsets,

View file

@ -134,6 +134,7 @@ void
ac_nir_export_position(nir_builder *b,
enum amd_gfx_level gfx_level,
uint32_t export_clipdist_mask,
bool dont_export_cull_distances,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
bool no_param_export,

View file

@ -78,8 +78,9 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
if (kill_layer)
export_outputs &= ~VARYING_BIT_LAYER;
ac_nir_export_position(&b, gfx_level, export_clipdist_mask, write_pos_to_clipvertex, pack_clip_cull_distances,
!has_param_exports, force_vrs, export_outputs, &out, NULL);
ac_nir_export_position(&b, gfx_level, export_clipdist_mask, false, write_pos_to_clipvertex,
pack_clip_cull_distances, !has_param_exports, force_vrs, export_outputs,
&out, NULL);
if (has_param_exports) {
ac_nir_export_parameters(&b, param_offsets,

View file

@ -1746,6 +1746,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
ac_nir_export_position(b, options->hw_info->gfx_level,
options->export_clipdist_mask,
options->dont_export_cull_distances,
options->write_pos_to_clipvertex,
options->pack_clip_cull_distances,
!options->has_param_exports,

View file

@ -469,6 +469,7 @@ ngg_gs_emit_output(nir_builder *b, nir_def *max_num_out_vtx, nir_def *max_num_ou
ac_nir_export_position(b, s->options->hw_info->gfx_level,
s->options->export_clipdist_mask,
s->options->dont_export_cull_distances,
s->options->write_pos_to_clipvertex,
s->options->pack_clip_cull_distances,
!s->options->has_param_exports,

View file

@ -887,7 +887,7 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool
ms_emit_arrayed_outputs(b, index, per_vertex_outputs, s);
if (exports) {
ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, false,
ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, false, false,
!s->has_param_exports, false,
s->per_vertex_outputs | VARYING_BIT_POS, &s->out, row);
}

View file

@ -290,6 +290,7 @@ void
ac_nir_export_position(nir_builder *b,
enum amd_gfx_level gfx_level,
uint32_t export_clipdist_mask,
bool dont_export_cull_distances,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
bool no_param_export,
@ -302,6 +303,11 @@ ac_nir_export_position(nir_builder *b,
unsigned exp_num = 0;
unsigned exp_pos_offset = 0;
if (dont_export_cull_distances) {
export_clipdist_mask &= ~BITFIELD_RANGE(b->shader->info.clip_distance_array_size,
b->shader->info.cull_distance_array_size);
}
uint64_t mask =
VARYING_BIT_PSIZ |
VARYING_BIT_EDGE |