From 5d8c817fd7c4ea46eade007ba54f22c61325186d Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 14 Apr 2026 16:25:33 +0200 Subject: [PATCH] radv: lower lowered io to scalar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already did this for everything except fragment shader outputs with epilogs. If we move it a bit earlier, we can stop lowering IO variables to scalar. Foz-DB Navi48: Totals from 1001 (0.49% of 205045) affected shaders: MaxWaves: 31252 -> 31256 (+0.01%) Instrs: 372258 -> 372036 (-0.06%); split: -0.14%, +0.08% CodeSize: 1999064 -> 1997836 (-0.06%); split: -0.13%, +0.06% VGPRs: 39096 -> 39072 (-0.06%) Latency: 1235558 -> 1235435 (-0.01%); split: -0.08%, +0.07% InvThroughput: 213845 -> 213875 (+0.01%); split: -0.06%, +0.07% VClause: 5840 -> 5838 (-0.03%) SClause: 10964 -> 10969 (+0.05%); split: -0.03%, +0.07% Copies: 21469 -> 21545 (+0.35%); split: -0.42%, +0.78% Branches: 5326 -> 5324 (-0.04%) PreSGPRs: 34214 -> 34206 (-0.02%); split: -0.03%, +0.01% PreVGPRs: 21931 -> 22001 (+0.32%); split: -0.06%, +0.38% VALU: 212386 -> 212418 (+0.02%); split: -0.07%, +0.09% SALU: 50409 -> 50378 (-0.06%); split: -0.07%, +0.01% VMEM: 8352 -> 8331 (-0.25%) SMEM: 17966 -> 17963 (-0.02%) This is mostly RA noise in GPL FS shaders. Reviewed-by: Marek Olšák Reviewed-by: Samuel Pitoiset Acked-by: Daniel Schürmann Part-of: --- src/amd/vulkan/radv_pipeline_graphics.c | 26 +++++++++---------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 92469f234ae..dc532d3ffc2 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1556,20 +1556,6 @@ radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages, enum amd_g /* Update load/store alignments because inter-stage code motion may move instructions used to deduce this info. */ NIR_PASS(_, shader, nir_opt_load_store_update_alignments); - - /* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */ - nir_variable_mode sca_mode = nir_var_shader_in; - bool sca_progress = false; - if (s != MESA_SHADER_FRAGMENT) - sca_mode |= nir_var_shader_out; - - NIR_PASS(sca_progress, shader, nir_lower_io_to_scalar, sca_mode, NULL, NULL); - - if (sca_progress) { - /* Eliminate useless vec->mov copies resulting from scalarization. */ - NIR_PASS(_, shader, nir_opt_copy_prop); - NIR_PASS(_, shader, nir_opt_constant_folding); - } } int highest_changed_producer = -1; @@ -2761,6 +2747,15 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac radv_nir_lower_io(device, stages[i].nir); + if (!stages[i].key.optimisations_disabled) { + /* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */ + NIR_PASS(_, stages[i].nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, NULL, NULL); + + /* Eliminate useless vec->mov copies resulting from scalarization. */ + NIR_PASS(_, stages[i].nir, nir_opt_copy_prop); + NIR_PASS(_, stages[i].nir, nir_opt_constant_folding); + } + stages[i].feedback.duration += os_time_get_nano() - stage_start; } @@ -2771,9 +2766,6 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac if (!gfx_state->ps.has_epilog) { NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state); - /* Lower FS outputs to scalar to allow dce. */ - NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL); - NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog); NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);