From cbeccb0a09d4c705a723c83c5e2ab02e6dda9cdc Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 13 Apr 2026 14:48:51 +0200 Subject: [PATCH] radv: call radv_optimize_nir after lowering io MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are moving more and more passes after lowering io because they no longer operate on variables. There doesn't really seem to be a need to optimize before lowering io, so do it afterwards. Foz-DB Navi48: Totals from 2339 (1.14% of 205045) affected shaders: MaxWaves: 66218 -> 66258 (+0.06%) Instrs: 2009510 -> 2007711 (-0.09%); split: -0.15%, +0.06% CodeSize: 10646476 -> 10648376 (+0.02%); split: -0.05%, +0.07% VGPRs: 131304 -> 131232 (-0.05%) Latency: 19249976 -> 19248715 (-0.01%); split: -0.02%, +0.02% InvThroughput: 3133252 -> 3132291 (-0.03%); split: -0.05%, +0.02% VClause: 32999 -> 33003 (+0.01%); split: -0.07%, +0.08% SClause: 42959 -> 43101 (+0.33%); split: -0.27%, +0.60% Copies: 143721 -> 143792 (+0.05%); split: -0.38%, +0.43% Branches: 38736 -> 38738 (+0.01%) PreSGPRs: 106104 -> 105846 (-0.24%); split: -0.27%, +0.03% PreVGPRs: 95217 -> 95179 (-0.04%); split: -0.04%, +0.00% VALU: 1146620 -> 1144783 (-0.16%); split: -0.22%, +0.06% SALU: 275263 -> 275183 (-0.03%); split: -0.16%, +0.13% VMEM: 58353 -> 58364 (+0.02%) SMEM: 84810 -> 85215 (+0.48%); split: -0.00%, +0.48% Reviewed-by: Marek Olšák Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/nir/radv_nir_lower_io.c | 5 --- src/amd/vulkan/radv_pipeline_graphics.c | 49 ++++++++++++------------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c index b44af9aa77f..0f42ca1087b 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_io.c +++ b/src/amd/vulkan/nir/radv_nir_lower_io.c @@ -169,11 +169,6 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir) if (nir->info.stage == MESA_SHADER_FRAGMENT) { /* Lower explicit input load intrinsics to sysvals for the layer ID. */ NIR_PASS(_, nir, nir_lower_system_values); - - /* Recompute FS input intrinsic bases to assign a location to each FS input. - * The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n. - */ - radv_recompute_fs_input_bases(nir); } NIR_PASS(_, nir, nir_opt_dce); diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index e076b9e4179..edffc081196 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2806,38 +2806,34 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac radv_foreach_stage (i, active_nir_stages) { int64_t stage_start = os_time_get_nano(); - radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled); - radv_nir_lower_io(device, stages[i].nir); stages[i].feedback.duration += os_time_get_nano() - stage_start; } if (stages[MESA_SHADER_FRAGMENT].nir) { - bool update_info = false; if (gfx_state->dynamic_line_rast_mode) - NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_poly_line_smooth, - RADV_NUM_SMOOTH_AA_SAMPLES); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_poly_line_smooth, RADV_NUM_SMOOTH_AA_SAMPLES); if (!gfx_state->ps.has_epilog) { - NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state); /* Lower FS outputs to scalar to allow dce. */ NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL); - NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog); - NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop); - NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce); - NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf); } - NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment); - NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord); - if (update_info) - nir_shader_gather_info(stages[MESA_SHADER_FRAGMENT].nir, - nir_shader_get_entrypoint(stages[MESA_SHADER_FRAGMENT].nir)); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_cse); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord); } radv_foreach_stage (i, active_nir_stages) { @@ -2864,20 +2860,23 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac if (!remove_as_varying && !remove_as_sysval) continue; - bool progress = false; + NIR_PASS(_, stages[i].nir, nir_remove_outputs, MESA_SHADER_FRAGMENT, remove_as_varying, remove_as_sysval); + break; + } - NIR_PASS(progress, stages[i].nir, nir_remove_outputs, MESA_SHADER_FRAGMENT, remove_as_varying, remove_as_sysval); + radv_foreach_stage (i, active_nir_stages) { + int64_t stage_start = os_time_get_nano(); - if (progress) { - /* Remove dead code resulting from removed outputs. */ - do { - progress = false; - NIR_PASS(progress, stages[i].nir, nir_opt_dce); - NIR_PASS(progress, stages[i].nir, nir_opt_dead_cf); - } while (progress); + radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled); + + if (i == MESA_SHADER_FRAGMENT) { + /* Recompute FS input intrinsic bases to assign a location to each FS input. + * The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n. + */ + radv_recompute_fs_input_bases(stages[i].nir); } - break; + stages[i].feedback.duration += os_time_get_nano() - stage_start; } /* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */