From 39a9dce5fcd6df2a1ba395d703965fd6ce334aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 26 May 2025 08:11:52 -0400 Subject: [PATCH] ac/nir: add an option to pack clip/cull distance components to remove holes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Timur Kristóf Part-of: --- src/amd/common/nir/ac_nir.h | 8 ++++++++ src/amd/common/nir/ac_nir_create_gs_copy_shader.c | 3 ++- src/amd/common/nir/ac_nir_helpers.h | 1 + src/amd/common/nir/ac_nir_lower_legacy_vs.c | 3 ++- src/amd/common/nir/ac_nir_lower_ngg.c | 1 + src/amd/common/nir/ac_nir_lower_ngg_gs.c | 1 + src/amd/common/nir/ac_nir_lower_ngg_mesh.c | 2 +- src/amd/common/nir/ac_nir_prerast_utils.c | 11 +++++++++++ src/amd/vulkan/radv_pipeline.c | 2 +- src/amd/vulkan/radv_pipeline_graphics.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 4 ++-- 11 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index 0c69c6026e9..ea9c09ef858 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -167,6 +167,12 @@ typedef struct { unsigned wave_size; uint8_t clip_cull_dist_mask; bool write_pos_to_clipvertex; + /* Remove clip/cull distance components that are missing in clip_cull_dist_mask, improving + * throughput by up to 50% (3 pos exports -> 2 pos exports). The caller shouldn't set no-op + * components (>= 0) in clip_cull_dist_mask to remove those completely. No-op components + * should be determined by nir_opt_clip_cull_const before this. + */ + bool pack_clip_cull_distances; const uint8_t *vs_output_param_offset; /* GFX11+ */ bool has_param_exports; bool can_cull; @@ -258,6 +264,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, enum amd_gfx_level gfx_level, uint32_t clip_cull_mask, bool write_pos_to_clipvertex, + bool pack_clip_cull_distances, const uint8_t *param_offsets, bool has_param_exports, bool disable_streamout, @@ -271,6 +278,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir, enum amd_gfx_level gfx_level, uint32_t clip_cull_mask, bool write_pos_to_clipvertex, + bool pack_clip_cull_distances, const uint8_t *param_offsets, bool has_param_exports, bool export_primitive_id, diff --git a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c index 02947aff4b2..813b990c557 100644 --- a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c +++ b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c @@ -15,6 +15,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, enum amd_gfx_level gfx_level, uint32_t clip_cull_mask, bool write_pos_to_clipvertex, + bool pack_clip_cull_distances, const uint8_t *param_offsets, bool has_param_exports, bool disable_streamout, @@ -119,7 +120,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, if (kill_layer) export_outputs &= ~VARYING_BIT_LAYER; - ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex, + ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex, pack_clip_cull_distances, !has_param_exports, force_vrs, export_outputs, &out, NULL); if (has_param_exports) { diff --git a/src/amd/common/nir/ac_nir_helpers.h b/src/amd/common/nir/ac_nir_helpers.h index 2665a0ab746..aa0bc03f10c 100644 --- a/src/amd/common/nir/ac_nir_helpers.h +++ b/src/amd/common/nir/ac_nir_helpers.h @@ -111,6 +111,7 @@ ac_nir_export_position(nir_builder *b, enum amd_gfx_level gfx_level, uint32_t clip_cull_mask, bool write_pos_to_clipvertex, + bool pack_clip_cull_distances, bool no_param_export, bool force_vrs, uint64_t outputs_written, diff --git a/src/amd/common/nir/ac_nir_lower_legacy_vs.c b/src/amd/common/nir/ac_nir_lower_legacy_vs.c index 91b5be12049..0a5425b24f0 100644 --- a/src/amd/common/nir/ac_nir_lower_legacy_vs.c +++ b/src/amd/common/nir/ac_nir_lower_legacy_vs.c @@ -37,6 +37,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir, enum amd_gfx_level gfx_level, uint32_t clip_cull_mask, bool write_pos_to_clipvertex, + bool pack_clip_cull_distances, const uint8_t *param_offsets, bool has_param_exports, bool export_primitive_id, @@ -77,7 +78,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir, if (kill_layer) export_outputs &= ~VARYING_BIT_LAYER; - ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex, + ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex, pack_clip_cull_distances, !has_param_exports, force_vrs, export_outputs, &out, NULL); if (has_param_exports) { diff --git a/src/amd/common/nir/ac_nir_lower_ngg.c b/src/amd/common/nir/ac_nir_lower_ngg.c index ebef309ef04..ed0b1f7d138 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg.c +++ b/src/amd/common/nir/ac_nir_lower_ngg.c @@ -1788,6 +1788,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option ac_nir_export_position(b, options->hw_info->gfx_level, options->clip_cull_dist_mask, options->write_pos_to_clipvertex, + options->pack_clip_cull_distances, !options->has_param_exports, options->force_vrs, export_outputs, &state.out, NULL); diff --git a/src/amd/common/nir/ac_nir_lower_ngg_gs.c b/src/amd/common/nir/ac_nir_lower_ngg_gs.c index 29166dfdc34..c63dad543df 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg_gs.c +++ b/src/amd/common/nir/ac_nir_lower_ngg_gs.c @@ -500,6 +500,7 @@ ngg_gs_emit_output(nir_builder *b, nir_def *max_num_out_vtx, nir_def *max_num_ou ac_nir_export_position(b, s->options->hw_info->gfx_level, s->options->clip_cull_dist_mask, s->options->write_pos_to_clipvertex, + s->options->pack_clip_cull_distances, !s->options->has_param_exports, s->options->force_vrs, export_outputs, &s->out, NULL); diff --git a/src/amd/common/nir/ac_nir_lower_ngg_mesh.c b/src/amd/common/nir/ac_nir_lower_ngg_mesh.c index fe15f3f5563..2eb83500457 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg_mesh.c +++ b/src/amd/common/nir/ac_nir_lower_ngg_mesh.c @@ -887,7 +887,7 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool ms_emit_arrayed_outputs(b, index, per_vertex_outputs, s); if (exports) { - ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, + ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, false, !s->has_param_exports, false, s->per_vertex_outputs | VARYING_BIT_POS, &s->out, row); } diff --git a/src/amd/common/nir/ac_nir_prerast_utils.c b/src/amd/common/nir/ac_nir_prerast_utils.c index 7f912be4004..e4cc8e397ce 100644 --- a/src/amd/common/nir/ac_nir_prerast_utils.c +++ b/src/amd/common/nir/ac_nir_prerast_utils.c @@ -229,6 +229,7 @@ ac_nir_export_position(nir_builder *b, enum amd_gfx_level gfx_level, uint32_t clip_cull_mask, bool write_pos_to_clipvertex, + bool pack_clip_cull_distances, bool no_param_export, bool force_vrs, uint64_t outputs_written, @@ -280,6 +281,16 @@ ac_nir_export_position(nir_builder *b, } } + /* If clip/cull distances are sparsely populated or some components are >= 0, pack them. */ + if (pack_clip_cull_distances) { + unsigned num = 0; + + u_foreach_bit(i, clip_cull_mask) { + clip_dist[num++] = clip_dist[i]; + } + clip_cull_mask = BITFIELD_MASK(num); + } + if (outputs_written & VARYING_BIT_POS) { /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang. * Setting valid_mask=1 prevents it and has no other effect. diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index ec75dc4308f..82bdc16f5b3 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -444,7 +444,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat } else if (is_last_vgt_stage) { if (stage->stage != MESA_SHADER_GEOMETRY) { NIR_PASS(_, stage->nir, ac_nir_lower_legacy_vs, gfx_level, - stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, false, + stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, false, false, stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports, stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex); diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 64843863e81..22fdde55444 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2272,7 +2272,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache }; nir_shader *nir = ac_nir_create_gs_copy_shader( gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, false, - gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false, + false, gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false, gs_info->force_vrs_per_vertex, &output_info); nir->info.internal = true; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 9d0b77b4808..3b7b1db702d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1601,7 +1601,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx * NIR_PASS_V(nir, ac_nir_lower_legacy_vs, sel->screen->info.gfx_level, clip_cull_mask, - false, + false, false, ctx->temp_info.vs_output_param_offset, shader->info.nr_param_exports, shader->key.ge.mono.u.vs_export_prim_id, @@ -1919,7 +1919,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, ac_nir_create_gs_copy_shader(gs_nir, sscreen->info.gfx_level, clip_cull_mask, - false, + false, false, temp_info->vs_output_param_offset, shader->info.nr_param_exports, !gs_shader->info.num_streamout_vec4s,