radv: call radv_optimize_nir after lowering io

We are moving more and more passes after lowering io
because they no longer operate on variables.

There doesn't really seem to be a need to optimize before
lowering io, so do it afterwards.

Foz-DB Navi48:
Totals from 2339 (1.14% of 205045) affected shaders:
MaxWaves: 66218 -> 66258 (+0.06%)
Instrs: 2009510 -> 2007711 (-0.09%); split: -0.15%, +0.06%
CodeSize: 10646476 -> 10648376 (+0.02%); split: -0.05%, +0.07%
VGPRs: 131304 -> 131232 (-0.05%)
Latency: 19249976 -> 19248715 (-0.01%); split: -0.02%, +0.02%
InvThroughput: 3133252 -> 3132291 (-0.03%); split: -0.05%, +0.02%
VClause: 32999 -> 33003 (+0.01%); split: -0.07%, +0.08%
SClause: 42959 -> 43101 (+0.33%); split: -0.27%, +0.60%
Copies: 143721 -> 143792 (+0.05%); split: -0.38%, +0.43%
Branches: 38736 -> 38738 (+0.01%)
PreSGPRs: 106104 -> 105846 (-0.24%); split: -0.27%, +0.03%
PreVGPRs: 95217 -> 95179 (-0.04%); split: -0.04%, +0.00%
VALU: 1146620 -> 1144783 (-0.16%); split: -0.22%, +0.06%
SALU: 275263 -> 275183 (-0.03%); split: -0.16%, +0.13%
VMEM: 58353 -> 58364 (+0.02%)
SMEM: 84810 -> 85215 (+0.48%); split: -0.00%, +0.48%

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40928>
This commit is contained in:
Georg Lehmann 2026-04-13 14:48:51 +02:00 committed by Marge Bot
parent 4598bbaea7
commit cbeccb0a09
2 changed files with 24 additions and 30 deletions

View file

@ -169,11 +169,6 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
/* Lower explicit input load intrinsics to sysvals for the layer ID. */
NIR_PASS(_, nir, nir_lower_system_values);
/* Recompute FS input intrinsic bases to assign a location to each FS input.
* The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
*/
radv_recompute_fs_input_bases(nir);
}
NIR_PASS(_, nir, nir_opt_dce);

View file

@ -2806,38 +2806,34 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
radv_foreach_stage (i, active_nir_stages) {
int64_t stage_start = os_time_get_nano();
radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
radv_nir_lower_io(device, stages[i].nir);
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
if (stages[MESA_SHADER_FRAGMENT].nir) {
bool update_info = false;
if (gfx_state->dynamic_line_rast_mode)
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_poly_line_smooth,
RADV_NUM_SMOOTH_AA_SAMPLES);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_poly_line_smooth, RADV_NUM_SMOOTH_AA_SAMPLES);
if (!gfx_state->ps.has_epilog) {
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state);
/* Lower FS outputs to scalar to allow dce. */
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf);
}
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord);
if (update_info)
nir_shader_gather_info(stages[MESA_SHADER_FRAGMENT].nir,
nir_shader_get_entrypoint(stages[MESA_SHADER_FRAGMENT].nir));
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_cse);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord);
}
radv_foreach_stage (i, active_nir_stages) {
@ -2864,20 +2860,23 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
if (!remove_as_varying && !remove_as_sysval)
continue;
bool progress = false;
NIR_PASS(_, stages[i].nir, nir_remove_outputs, MESA_SHADER_FRAGMENT, remove_as_varying, remove_as_sysval);
break;
}
NIR_PASS(progress, stages[i].nir, nir_remove_outputs, MESA_SHADER_FRAGMENT, remove_as_varying, remove_as_sysval);
radv_foreach_stage (i, active_nir_stages) {
int64_t stage_start = os_time_get_nano();
if (progress) {
/* Remove dead code resulting from removed outputs. */
do {
progress = false;
NIR_PASS(progress, stages[i].nir, nir_opt_dce);
NIR_PASS(progress, stages[i].nir, nir_opt_dead_cf);
} while (progress);
radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
if (i == MESA_SHADER_FRAGMENT) {
/* Recompute FS input intrinsic bases to assign a location to each FS input.
* The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
*/
radv_recompute_fs_input_bases(stages[i].nir);
}
break;
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
/* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */