ac/nir: add an option to pack clip/cull distance components to remove holes

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35351>
This commit is contained in:
Marek Olšák 2025-05-26 08:11:52 -04:00 committed by Marge Bot
parent 6cd813810e
commit 39a9dce5fc
11 changed files with 31 additions and 7 deletions

View file

@ -167,6 +167,12 @@ typedef struct {
unsigned wave_size;
uint8_t clip_cull_dist_mask;
bool write_pos_to_clipvertex;
/* Remove clip/cull distance components that are missing in clip_cull_dist_mask, improving
* throughput by up to 50% (3 pos exports -> 2 pos exports). The caller shouldn't set no-op
* components (>= 0) in clip_cull_dist_mask to remove those completely. No-op components
* should be determined by nir_opt_clip_cull_const before this.
*/
bool pack_clip_cull_distances;
const uint8_t *vs_output_param_offset; /* GFX11+ */
bool has_param_exports;
bool can_cull;
@ -258,6 +264,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
const uint8_t *param_offsets,
bool has_param_exports,
bool disable_streamout,
@ -271,6 +278,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
const uint8_t *param_offsets,
bool has_param_exports,
bool export_primitive_id,

View file

@ -15,6 +15,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
const uint8_t *param_offsets,
bool has_param_exports,
bool disable_streamout,
@ -119,7 +120,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
if (kill_layer)
export_outputs &= ~VARYING_BIT_LAYER;
ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex,
ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex, pack_clip_cull_distances,
!has_param_exports, force_vrs, export_outputs, &out, NULL);
if (has_param_exports) {

View file

@ -111,6 +111,7 @@ ac_nir_export_position(nir_builder *b,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
bool no_param_export,
bool force_vrs,
uint64_t outputs_written,

View file

@ -37,6 +37,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
const uint8_t *param_offsets,
bool has_param_exports,
bool export_primitive_id,
@ -77,7 +78,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
if (kill_layer)
export_outputs &= ~VARYING_BIT_LAYER;
ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex,
ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex, pack_clip_cull_distances,
!has_param_exports, force_vrs, export_outputs, &out, NULL);
if (has_param_exports) {

View file

@ -1788,6 +1788,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
ac_nir_export_position(b, options->hw_info->gfx_level,
options->clip_cull_dist_mask,
options->write_pos_to_clipvertex,
options->pack_clip_cull_distances,
!options->has_param_exports,
options->force_vrs,
export_outputs, &state.out, NULL);

View file

@ -500,6 +500,7 @@ ngg_gs_emit_output(nir_builder *b, nir_def *max_num_out_vtx, nir_def *max_num_ou
ac_nir_export_position(b, s->options->hw_info->gfx_level,
s->options->clip_cull_dist_mask,
s->options->write_pos_to_clipvertex,
s->options->pack_clip_cull_distances,
!s->options->has_param_exports,
s->options->force_vrs,
export_outputs, &s->out, NULL);

View file

@ -887,7 +887,7 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool
ms_emit_arrayed_outputs(b, index, per_vertex_outputs, s);
if (exports) {
ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false,
ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, false,
!s->has_param_exports, false,
s->per_vertex_outputs | VARYING_BIT_POS, &s->out, row);
}

View file

@ -229,6 +229,7 @@ ac_nir_export_position(nir_builder *b,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
bool no_param_export,
bool force_vrs,
uint64_t outputs_written,
@ -280,6 +281,16 @@ ac_nir_export_position(nir_builder *b,
}
}
/* If clip/cull distances are sparsely populated or some components are >= 0, pack them. */
if (pack_clip_cull_distances) {
unsigned num = 0;
u_foreach_bit(i, clip_cull_mask) {
clip_dist[num++] = clip_dist[i];
}
clip_cull_mask = BITFIELD_MASK(num);
}
if (outputs_written & VARYING_BIT_POS) {
/* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
* Setting valid_mask=1 prevents it and has no other effect.

View file

@ -444,7 +444,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
} else if (is_last_vgt_stage) {
if (stage->stage != MESA_SHADER_GEOMETRY) {
NIR_PASS(_, stage->nir, ac_nir_lower_legacy_vs, gfx_level,
stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, false,
stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, false, false,
stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports,
stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);

View file

@ -2272,7 +2272,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
};
nir_shader *nir = ac_nir_create_gs_copy_shader(
gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, false,
gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
false, gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
gs_info->force_vrs_per_vertex, &output_info);
nir->info.internal = true;

View file

@ -1601,7 +1601,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
NIR_PASS_V(nir, ac_nir_lower_legacy_vs,
sel->screen->info.gfx_level,
clip_cull_mask,
false,
false, false,
ctx->temp_info.vs_output_param_offset,
shader->info.nr_param_exports,
shader->key.ge.mono.u.vs_export_prim_id,
@ -1919,7 +1919,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
ac_nir_create_gs_copy_shader(gs_nir,
sscreen->info.gfx_level,
clip_cull_mask,
false,
false, false,
temp_info->vs_output_param_offset,
shader->info.nr_param_exports,
!gs_shader->info.num_streamout_vec4s,