diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index c9cada9aa00..681f86f1529 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -165,7 +165,13 @@ typedef struct { unsigned max_workgroup_size; unsigned wave_size; - /* The mask of clip and cull distances that the shader should export. */ + /* The mask of clip and cull distances that the shader should export. + * + * Clip/cull distance components that are missing in export_clipdist_mask are removed, improving + * throughput by up to 50% (3 pos exports -> 2 pos exports). The caller shouldn't set no-op + * components (>= 0) in export_clipdist_mask to remove those completely. No-op components + * should be determined by nir_opt_clip_cull_const before this. + */ uint8_t export_clipdist_mask; /* The mask of clip and cull distances that the shader should cull against. * If no clip and cull distance outputs are present, it will load clip planes and cull @@ -179,12 +185,6 @@ typedef struct { */ bool dont_export_cull_distances; bool write_pos_to_clipvertex; - /* Remove clip/cull distance components that are missing in export_clipdist_mask, improving - * throughput by up to 50% (3 pos exports -> 2 pos exports). The caller shouldn't set no-op - * components (>= 0) in export_clipdist_mask to remove those completely. No-op components - * should be determined by nir_opt_clip_cull_const before this. - */ - bool pack_clip_cull_distances; const uint8_t *vs_output_param_offset; /* GFX11+ */ bool has_param_exports; bool can_cull; @@ -255,7 +255,6 @@ ac_nir_lower_legacy_vs(nir_shader *nir, enum amd_gfx_level gfx_level, uint32_t export_clipdist_mask, bool write_pos_to_clipvertex, - bool pack_clip_cull_distances, const uint8_t *param_offsets, bool has_param_exports, bool export_primitive_id, @@ -269,7 +268,6 @@ typedef struct { enum amd_gfx_level gfx_level; uint32_t export_clipdist_mask; bool write_pos_to_clipvertex; - bool pack_clip_cull_distances; const uint8_t *param_offsets; bool has_param_exports; bool disable_streamout; diff --git a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c index a7b86d00056..e80e7f09bd9 100644 --- a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c +++ b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c @@ -67,9 +67,8 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_op if (stream == 0) { ac_nir_export_position(&b, options->gfx_level, options->export_clipdist_mask, false, - options->write_pos_to_clipvertex, options->pack_clip_cull_distances, - !options->has_param_exports, options->force_vrs, - b.shader->info.outputs_written | VARYING_BIT_POS, + options->write_pos_to_clipvertex, !options->has_param_exports, + options->force_vrs, b.shader->info.outputs_written | VARYING_BIT_POS, out, NULL); if (options->has_param_exports) { diff --git a/src/amd/common/nir/ac_nir_helpers.h b/src/amd/common/nir/ac_nir_helpers.h index 692af4477b5..7ecb70053ae 100644 --- a/src/amd/common/nir/ac_nir_helpers.h +++ b/src/amd/common/nir/ac_nir_helpers.h @@ -128,7 +128,6 @@ ac_nir_export_position(nir_builder *b, uint32_t export_clipdist_mask, bool dont_export_cull_distances, bool write_pos_to_clipvertex, - bool pack_clip_cull_distances, bool no_param_export, bool force_vrs, uint64_t outputs_written, diff --git a/src/amd/common/nir/ac_nir_lower_legacy_vs.c b/src/amd/common/nir/ac_nir_lower_legacy_vs.c index 4b8aa96bde1..aadb1b21e30 100644 --- a/src/amd/common/nir/ac_nir_lower_legacy_vs.c +++ b/src/amd/common/nir/ac_nir_lower_legacy_vs.c @@ -37,7 +37,6 @@ ac_nir_lower_legacy_vs(nir_shader *nir, enum amd_gfx_level gfx_level, uint32_t export_clipdist_mask, bool write_pos_to_clipvertex, - bool pack_clip_cull_distances, const uint8_t *param_offsets, bool has_param_exports, bool export_primitive_id, @@ -71,8 +70,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir, ac_nir_clamp_vertex_color_outputs(&b, &out); ac_nir_export_position(&b, gfx_level, export_clipdist_mask, false, write_pos_to_clipvertex, - pack_clip_cull_distances, !has_param_exports, force_vrs, - nir->info.outputs_written | VARYING_BIT_POS, + !has_param_exports, force_vrs, nir->info.outputs_written | VARYING_BIT_POS, &out, NULL); if (has_param_exports) { diff --git a/src/amd/common/nir/ac_nir_lower_ngg.c b/src/amd/common/nir/ac_nir_lower_ngg.c index 44889b7e8d6..3b2e9b82cfa 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg.c +++ b/src/amd/common/nir/ac_nir_lower_ngg.c @@ -1716,7 +1716,6 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option options->export_clipdist_mask, options->dont_export_cull_distances, options->write_pos_to_clipvertex, - options->pack_clip_cull_distances, !options->has_param_exports, options->force_vrs, export_outputs, &state.out, NULL); diff --git a/src/amd/common/nir/ac_nir_lower_ngg_gs.c b/src/amd/common/nir/ac_nir_lower_ngg_gs.c index d8b287bd483..6ebaad4b304 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg_gs.c +++ b/src/amd/common/nir/ac_nir_lower_ngg_gs.c @@ -406,7 +406,6 @@ ngg_gs_emit_output(nir_builder *b, nir_def *max_num_out_vtx, nir_def *max_num_ou s->options->export_clipdist_mask, s->options->dont_export_cull_distances, s->options->write_pos_to_clipvertex, - s->options->pack_clip_cull_distances, !s->options->has_param_exports, s->options->force_vrs, b->shader->info.outputs_written | VARYING_BIT_POS, &s->out, NULL); diff --git a/src/amd/common/nir/ac_nir_lower_ngg_mesh.c b/src/amd/common/nir/ac_nir_lower_ngg_mesh.c index 992a099ac91..7ad00be1f82 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg_mesh.c +++ b/src/amd/common/nir/ac_nir_lower_ngg_mesh.c @@ -887,7 +887,7 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool ms_emit_arrayed_outputs(b, index, per_vertex_outputs, s); if (exports) { - ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, false, true, + ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, false, !s->has_param_exports, false, s->per_vertex_outputs | VARYING_BIT_POS, &s->out, row); } diff --git a/src/amd/common/nir/ac_nir_prerast_utils.c b/src/amd/common/nir/ac_nir_prerast_utils.c index 73ea9cb5bed..e15cdc43add 100644 --- a/src/amd/common/nir/ac_nir_prerast_utils.c +++ b/src/amd/common/nir/ac_nir_prerast_utils.c @@ -268,7 +268,6 @@ ac_nir_export_position(nir_builder *b, uint32_t export_clipdist_mask, bool dont_export_cull_distances, bool write_pos_to_clipvertex, - bool pack_clip_cull_distances, bool no_param_export, bool force_vrs, uint64_t outputs_written, @@ -326,14 +325,11 @@ ac_nir_export_position(nir_builder *b, } /* If clip/cull distances are sparsely populated or some components are >= 0, pack them. */ - if (pack_clip_cull_distances) { - unsigned num = 0; - - u_foreach_bit(i, export_clipdist_mask) { - clip_dist[num++] = clip_dist[i]; - } - export_clipdist_mask = BITFIELD_MASK(num); + unsigned num = 0; + u_foreach_bit(i, export_clipdist_mask) { + clip_dist[num++] = clip_dist[i]; } + export_clipdist_mask = BITFIELD_MASK(num); if (outputs_written & VARYING_BIT_POS) { /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang. diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 1349b243891..0fdc37a850e 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -489,7 +489,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat } else if (is_last_vgt_stage) { if (stage->stage != MESA_SHADER_GEOMETRY) { NIR_PASS(_, stage->nir, ac_nir_lower_legacy_vs, gfx_level, - stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, false, true, + stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, false, stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports, stage->info.outinfo.export_prim_id, false, stage->info.force_vrs_per_vertex); @@ -499,7 +499,6 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat .has_pipeline_stats_query = false, .gfx_level = pdev->info.gfx_level, .export_clipdist_mask = stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, - .pack_clip_cull_distances = true, .param_offsets = stage->info.outinfo.vs_output_param_offset, .has_param_exports = stage->info.outinfo.param_exports, .force_vrs = stage->info.force_vrs_per_vertex, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 3d3b6cbcd58..0c48ea7532a 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -792,7 +792,6 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, options.export_clipdist_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask; options.cull_clipdist_mask = options.export_clipdist_mask; options.dont_export_cull_distances = info->has_ngg_culling; - options.pack_clip_cull_distances = true; options.vs_output_param_offset = info->outinfo.vs_output_param_offset; options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports; options.can_cull = info->has_ngg_culling; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2275b46ce54..a0ba9fdc960 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1129,7 +1129,6 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir, shader->info.culldist_mask : 0, .dont_export_cull_distances = si_shader_culling_enabled(shader), .write_pos_to_clipvertex = shader->key.ge.mono.write_pos_to_clipvertex, - .pack_clip_cull_distances = true, .force_vrs = sel->screen->options.vrs2x2, .use_gfx12_xfb_intrinsic = !nir->info.use_aco_amd, .skip_viewport_state_culling = sel->info.writes_viewport_index, @@ -1542,7 +1541,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx * NIR_PASS(_, nir, ac_nir_lower_legacy_vs, sel->screen->info.gfx_level, shader->info.clipdist_mask | shader->info.culldist_mask, - shader->key.ge.mono.write_pos_to_clipvertex, true, + shader->key.ge.mono.write_pos_to_clipvertex, ctx->temp_info.vs_output_param_offset, shader->info.nr_param_exports, shader->key.ge.mono.u.vs_export_prim_id, @@ -1560,7 +1559,6 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx * .gfx_level = sel->screen->info.gfx_level, .export_clipdist_mask = shader->info.clipdist_mask | shader->info.culldist_mask, .write_pos_to_clipvertex = shader->key.ge.mono.write_pos_to_clipvertex, - .pack_clip_cull_distances = true, .param_offsets = ctx->temp_info.vs_output_param_offset, .has_param_exports = shader->info.nr_param_exports, .disable_streamout = !shader->info.num_streamout_vec4s,