From c63c6951494b598c66c4c718cb8d75ff73930cbb Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 29 Aug 2025 16:14:17 +0100 Subject: [PATCH] radv: move nir_opt_algebraic loop for NGG culling earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Totals from 6913 (8.66% of 79825) affected shaders: (Navi21) Instrs: 5373319 -> 5358717 (-0.27%); split: -0.30%, +0.03% CodeSize: 27448536 -> 27345464 (-0.38%); split: -0.41%, +0.03% SpillSGPRs: 982 -> 998 (+1.63%) Latency: 22998827 -> 23011602 (+0.06%); split: -0.13%, +0.19% InvThroughput: 4663749 -> 4664809 (+0.02%); split: -0.00%, +0.03% VClause: 120845 -> 120461 (-0.32%); split: -0.49%, +0.17% SClause: 119068 -> 116064 (-2.52%); split: -2.71%, +0.18% Copies: 456590 -> 456450 (-0.03%); split: -0.19%, +0.16% Branches: 145555 -> 145559 (+0.00%); split: -0.00%, +0.01% PreSGPRs: 300465 -> 301154 (+0.23%); split: -0.01%, +0.24% VALU: 3064127 -> 3064210 (+0.00%); split: -0.00%, +0.00% SALU: 891257 -> 886368 (-0.55%); split: -0.71%, +0.16% SMEM: 190500 -> 184624 (-3.08%); split: -3.11%, +0.02% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/vulkan/radv_pipeline.c | 11 +++++++-- src/amd/vulkan/radv_shader.c | 44 +++++++++++++++++++++------------- src/amd/vulkan/radv_shader.h | 2 ++ 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 884eff95f8d..204848f6e1d 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -433,6 +433,15 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat if (constant_fold_for_push_const && stage->args.ac.inline_push_const_mask) NIR_PASS(_, stage->nir, nir_opt_constant_folding); + /* Optimize NIR before NGG culling */ + bool is_last_vgt_stage = radv_is_last_vgt_stage(stage); + bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage; + if (lowered_ngg && stage->nir->info.stage != MESA_SHADER_GEOMETRY && stage->info.has_ngg_culling) + radv_optimize_nir_algebraic_early(stage->nir); + + /* This has to be done after nir_opt_algebraic for best descriptor vectorization, but also before + * NGG culling. + */ NIR_PASS(_, stage->nir, radv_nir_apply_pipeline_layout, device, stage); NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device); @@ -466,9 +475,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat } /* Lower I/O intrinsics to memory instructions. */ - bool is_last_vgt_stage = radv_is_last_vgt_stage(stage); bool io_to_mem = radv_nir_lower_io_to_mem(device, stage); - bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage; if (lowered_ngg) { radv_lower_ngg(device, stage, gfx_state); } else if (is_last_vgt_stage) { diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index a702997fb26..4c7373c0266 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -234,7 +234,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) } void -radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level) +radv_optimize_nir_algebraic_early(nir_shader *nir) { bool more_algebraic = true; while (more_algebraic) { @@ -258,21 +258,11 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e NIR_PASS(_, nir, nir_opt_remove_phis); NIR_PASS(_, nir, nir_opt_dead_cf); } +} - if (opt_offsets) { - const nir_opt_offsets_options offset_options = { - .uniform_max = 0, - .buffer_max = ~0, - .shared_max = UINT16_MAX, - .shared_atomic_max = UINT16_MAX, - .allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb, - .cb_data = &gfx_level, - }; - NIR_PASS(_, nir, nir_opt_offsets, &offset_options); - } - if (opt_mqsad) - NIR_PASS(_, nir, nir_opt_mqsad); - +void +radv_optimize_nir_algebraic_late(nir_shader *nir) +{ /* Do late algebraic optimization to turn add(a, * neg(b)) back into subs, then the mandatory cleanup * after algebraic. Note that it may produce fnegs, @@ -292,6 +282,28 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e _mesa_set_destroy(skip, NULL); } +void +radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level) +{ + radv_optimize_nir_algebraic_early(nir); + + if (opt_offsets) { + const nir_opt_offsets_options offset_options = { + .uniform_max = 0, + .buffer_max = ~0, + .shared_max = UINT16_MAX, + .shared_atomic_max = UINT16_MAX, + .allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb, + .cb_data = &gfx_level, + }; + NIR_PASS(_, nir, nir_opt_offsets, &offset_options); + } + if (opt_mqsad) + NIR_PASS(_, nir, nir_opt_mqsad); + + radv_optimize_nir_algebraic_late(nir); +} + static void shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) { @@ -913,7 +925,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, assert(info->is_ngg); if (info->has_ngg_culling) - radv_optimize_nir_algebraic(nir, false, false, pdev->info.gfx_level); + radv_optimize_nir_algebraic_late(nir); options.num_vertices_per_primitive = num_vertices_per_prim; options.early_prim_export = info->has_ngg_early_prim_export; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index dacc6d9cc4d..c25a3101f04 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -487,6 +487,8 @@ struct radv_shader_dma_submission { struct radv_shader_stage; void radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively); +void radv_optimize_nir_algebraic_early(nir_shader *shader); +void radv_optimize_nir_algebraic_late(nir_shader *shader); void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level);