radv: lower lowered io to scalar

We already did this for everything except
fragment shader outputs with epilogs.

If we move it a bit earlier, we can stop lowering IO
variables to scalar.

Foz-DB Navi48:
Totals from 1001 (0.49% of 205045) affected shaders:
MaxWaves: 31252 -> 31256 (+0.01%)
Instrs: 372258 -> 372036 (-0.06%); split: -0.14%, +0.08%
CodeSize: 1999064 -> 1997836 (-0.06%); split: -0.13%, +0.06%
VGPRs: 39096 -> 39072 (-0.06%)
Latency: 1235558 -> 1235435 (-0.01%); split: -0.08%, +0.07%
InvThroughput: 213845 -> 213875 (+0.01%); split: -0.06%, +0.07%
VClause: 5840 -> 5838 (-0.03%)
SClause: 10964 -> 10969 (+0.05%); split: -0.03%, +0.07%
Copies: 21469 -> 21545 (+0.35%); split: -0.42%, +0.78%
Branches: 5326 -> 5324 (-0.04%)
PreSGPRs: 34214 -> 34206 (-0.02%); split: -0.03%, +0.01%
PreVGPRs: 21931 -> 22001 (+0.32%); split: -0.06%, +0.38%
VALU: 212386 -> 212418 (+0.02%); split: -0.07%, +0.09%
SALU: 50409 -> 50378 (-0.06%); split: -0.07%, +0.01%
VMEM: 8352 -> 8331 (-0.25%)
SMEM: 17966 -> 17963 (-0.02%)

This is mostly RA noise in GPL FS shaders.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40955>
This commit is contained in:
Georg Lehmann 2026-04-14 16:25:33 +02:00 committed by Marge Bot
parent 1f998b38f4
commit 5d8c817fd7

View file

@ -1556,20 +1556,6 @@ radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages, enum amd_g
/* Update load/store alignments because inter-stage code motion may move instructions used to deduce this info. */
NIR_PASS(_, shader, nir_opt_load_store_update_alignments);
/* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */
nir_variable_mode sca_mode = nir_var_shader_in;
bool sca_progress = false;
if (s != MESA_SHADER_FRAGMENT)
sca_mode |= nir_var_shader_out;
NIR_PASS(sca_progress, shader, nir_lower_io_to_scalar, sca_mode, NULL, NULL);
if (sca_progress) {
/* Eliminate useless vec->mov copies resulting from scalarization. */
NIR_PASS(_, shader, nir_opt_copy_prop);
NIR_PASS(_, shader, nir_opt_constant_folding);
}
}
int highest_changed_producer = -1;
@ -2761,6 +2747,15 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
radv_nir_lower_io(device, stages[i].nir);
if (!stages[i].key.optimisations_disabled) {
/* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */
NIR_PASS(_, stages[i].nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, NULL, NULL);
/* Eliminate useless vec->mov copies resulting from scalarization. */
NIR_PASS(_, stages[i].nir, nir_opt_copy_prop);
NIR_PASS(_, stages[i].nir, nir_opt_constant_folding);
}
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
@ -2771,9 +2766,6 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
if (!gfx_state->ps.has_epilog) {
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state);
/* Lower FS outputs to scalar to allow dce. */
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);