From e4490c4f0e9ed7458517ad9001d17d97d6037a95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 8 Dec 2025 21:12:26 -0600 Subject: [PATCH 1/8] radv: Lower 64-bit VS inputs to 32-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf --- src/amd/vulkan/nir/radv_nir_lower_io.c | 9 +---- src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c | 39 ++++++++++++++++--- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c index 665c73b08df..1045041d8fd 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_io.c +++ b/src/amd/vulkan/nir/radv_nir_lower_io.c @@ -149,13 +149,8 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir) NIR_PASS(_, nir, nir_lower_tess_level_array_vars_to_vec); } - if (nir->info.stage == MESA_SHADER_VERTEX) { - NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0); - NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32); - } else { - NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, - nir_lower_io_lower_64bit_to_32 | nir_lower_io_use_interpolated_input_intrinsics); - } + NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, + nir_lower_io_lower_64bit_to_32 | nir_lower_io_use_interpolated_input_intrinsics); /* Fold constant offset srcs for IO. */ NIR_PASS(_, nir, nir_opt_constant_folding); diff --git a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c index a2f3ec8f7fe..3b56060e752 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c +++ b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c @@ -184,6 +184,22 @@ adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_ad return alpha; } +static enum pipe_format +adjust_format(const enum pipe_format attrib_format) +{ + if (util_format_get_max_channel_size(attrib_format) <= 32) + return attrib_format; + + const struct util_format_description *f = util_format_description(attrib_format); + + /* 1x 64-bit channel ~ 2x 32-bit channel */ + if (f->nr_channels == 1) + return PIPE_FORMAT_R32G32_UINT; + + /* 2x 64-bit channel ~ 4x 32-bit channel */ + return PIPE_FORMAT_R32G32B32A32_UINT; +} + static nir_def * lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs_state *s) { @@ -192,10 +208,19 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin); const unsigned base_offset = nir_src_as_uint(*offset_src); - const unsigned location = io_sem.location + base_offset - VERT_ATTRIB_GENERIC0; + const unsigned loc = io_sem.location + base_offset - VERT_ATTRIB_GENERIC0; const unsigned bit_size = intrin->def.bit_size; const unsigned dest_num_components = intrin->def.num_components; + /* Check if the current slot is the high part of a 64-bit input. + * If so, correct the location and remember to add an offset. + */ + const unsigned location = + loc > 0 && (s->gfx_state->vi.attributes_valid & (1 << (loc - 1))) && + util_format_get_max_channel_size(s->gfx_state->vi.vertex_attribute_formats[loc - 1]) == 64 + ? loc - 1 : loc; + const unsigned high_dvec2 = location == loc - 1; + if (!(s->gfx_state->vi.attributes_valid & (1 << location))) { /* Return early for unassigned attribute reads. */ return nir_imm_zero(b, intrin->def.num_components, intrin->def.bit_size); @@ -209,7 +234,7 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs * 64-bit variables must not have a component of 1 or 3. * (See VK spec 15.1.5 "Component Assignment") */ - const unsigned component = nir_intrinsic_component(intrin) / (MAX2(32, bit_size) / 32); + const unsigned component = nir_intrinsic_component(intrin); /* Bitmask of components in bit_size units * of the current input load that are actually used. @@ -225,7 +250,7 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs const uint32_t attrib_binding = s->gfx_state->vi.vertex_attribute_bindings[location]; const uint32_t attrib_offset = s->gfx_state->vi.vertex_attribute_offsets[location]; const uint32_t attrib_stride = s->gfx_state->vi.vertex_attribute_strides[location]; - const enum pipe_format attrib_format = s->gfx_state->vi.vertex_attribute_formats[location]; + const enum pipe_format attrib_format = adjust_format(s->gfx_state->vi.vertex_attribute_formats[location]); const struct util_format_description *f = util_format_description(attrib_format); const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(s->gpu_info->gfx_level, s->gpu_info->family, attrib_format); @@ -255,14 +280,16 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs * Beneficial because the backend may be able to emit fewer HW instructions. * Only possible with array formats. */ - const unsigned first_used_channel = first_used_swizzled_channel(f, dest_use_mask, false); + const unsigned first_used_channel = + needs_swizzle ? first_used_swizzled_channel(f, dest_use_mask, false) : (ffs(dest_use_mask) - 1); const unsigned skipped_start = f->is_array ? first_used_channel : 0; /* Number of channels we actually use and load. * Don't shrink the format here because this might allow the backend to * emit fewer (but larger than needed) HW instructions. */ - const unsigned first_trailing_unused_channel = first_used_swizzled_channel(f, dest_use_mask, true) + 1; + const unsigned first_trailing_unused_channel = + needs_swizzle ? (first_used_swizzled_channel(f, dest_use_mask, true) + 1) : util_last_bit(dest_use_mask); const unsigned max_loaded_channels = MIN2(first_trailing_unused_channel, f->nr_channels); const unsigned fetch_num_channels = first_used_channel >= max_loaded_channels ? 0 : max_loaded_channels - skipped_start; @@ -287,7 +314,7 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs nir_def *index = base_index; /* Add excess constant offset to the index. */ - unsigned const_off = attrib_offset + count_format_bytes(f, 0, start); + unsigned const_off = attrib_offset + high_dvec2 * 16 + count_format_bytes(f, 0, start); if (attrib_stride && const_off >= attrib_stride) { index = nir_iadd_imm(b, base_index, const_off / attrib_stride); const_off %= attrib_stride; From 19ac9f397f723b8790cd19b0d8222255f8064591 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 8 Dec 2025 12:36:36 -0600 Subject: [PATCH 2/8] radv: Scalarize and re-vectorize unlinked shader I/O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reasons to do this: - Optimize VS inputs (always unlinked) - Allow some optimization on unlinked shaders for GPL/ESO - Prepare for retiring the old linking passes Fossil DB stats on Strix Halo (GFX11.5): Totals from 1772 (2.22% of 79825) affected shaders: MaxWaves: 49934 -> 50128 (+0.39%) Instrs: 1220418 -> 1219664 (-0.06%); split: -0.20%, +0.14% CodeSize: 6154972 -> 6152224 (-0.04%); split: -0.17%, +0.13% VGPRs: 91368 -> 91044 (-0.35%); split: -0.43%, +0.08% Latency: 6021684 -> 6019445 (-0.04%); split: -0.26%, +0.22% InvThroughput: 853326 -> 853323 (-0.00%); split: -0.10%, +0.10% VClause: 21636 -> 21674 (+0.18%); split: -0.15%, +0.33% SClause: 31251 -> 31162 (-0.28%); split: -0.46%, +0.18% Copies: 72778 -> 72833 (+0.08%); split: -0.82%, +0.89% Branches: 20198 -> 20174 (-0.12%) PreSGPRs: 76428 -> 76459 (+0.04%) PreVGPRs: 66813 -> 65696 (-1.67%); split: -1.81%, +0.14% VALU: 669782 -> 669961 (+0.03%); split: -0.09%, +0.11% SALU: 207998 -> 208056 (+0.03%); split: -0.05%, +0.08% VMEM: 34111 -> 34089 (-0.06%) Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Reviewed-by: Georg Lehmann --- src/amd/vulkan/radv_pipeline_graphics.c | 41 +++++++++++++++---------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 88b70a1046e..2db5ebb8e33 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1703,25 +1703,26 @@ radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages, enum amd_g /* Prepare shaders before running nir_opt_varyings. */ for (int i = 0; i < ARRAY_SIZE(graphics_shader_order); ++i) { const mesa_shader_stage s = graphics_shader_order[i]; - const mesa_shader_stage next = stages[s].info.next_stage; - if (!stages[s].nir || next == MESA_SHADER_NONE || !stages[next].nir) + if (!stages[s].nir) continue; - if (stages[s].key.optimisations_disabled || stages[next].key.optimisations_disabled) + if (stages[s].key.optimisations_disabled) continue; nir_shader *producer = stages[s].nir; - nir_shader *consumer = stages[next].nir; /* It is expected by nir_opt_varyings that no undefined stores are present in the shader. */ NIR_PASS(_, producer, nir_opt_undef); /* Update load/store alignments because inter-stage code motion may move instructions used to deduce this info. */ - NIR_PASS(_, consumer, nir_opt_load_store_update_alignments); + NIR_PASS(_, producer, nir_opt_load_store_update_alignments); /* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */ - NIR_PASS(_, producer, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL); - NIR_PASS(_, consumer, nir_lower_io_to_scalar, nir_var_shader_in, NULL, NULL); + nir_variable_mode sca_mode = nir_var_shader_in; + if (s != MESA_SHADER_FRAGMENT) + sca_mode |= nir_var_shader_out; + + NIR_PASS(_, producer, nir_lower_io_to_scalar, sca_mode, NULL, NULL); /* Eliminate useless vec->mov copies resulting from scalarization. */ NIR_PASS(_, producer, nir_opt_copy_prop); @@ -1787,22 +1788,28 @@ radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages, enum amd_g /* Run optimizations and fixups after linking. */ for (int i = 0; i < ARRAY_SIZE(graphics_shader_order); ++i) { const mesa_shader_stage s = graphics_shader_order[i]; - const mesa_shader_stage next = stages[s].info.next_stage; if (!stages[s].nir) continue; nir_shader *producer = stages[s].nir; - /* Re-vectorize I/O for stages that output to memory (LDS or VRAM). - * Don't vectorize FS inputs, doing so just regresses shader stats without any benefit. - * There is also no benefit from re-vectorizing the outputs of the last pre-rasterization - * stage here, because ac_nir_lower_ngg/legacy already takes care of that. + /* Re-vectorize I/O for stages that use memory for I/O (LDS or VRAM). + * Don't vectorize FS I/O, doing so just regresses shader stats without any benefit. */ - if (next != MESA_SHADER_NONE && stages[next].nir && next != MESA_SHADER_FRAGMENT && - !stages[s].key.optimisations_disabled && !stages[next].key.optimisations_disabled) { - nir_shader *consumer = stages[next].nir; - NIR_PASS(_, producer, nir_opt_vectorize_io, nir_var_shader_out, false); - NIR_PASS(_, consumer, nir_opt_vectorize_io, nir_var_shader_in, false); + if (s != MESA_SHADER_FRAGMENT && !stages[s].key.optimisations_disabled) { + /* Delete dead instructions to prevent them from being vectorized. */ + NIR_PASS(_, producer, nir_opt_dce); + + /* Vectorize all inputs. Non-FS inputs are always read from memory. */ + nir_variable_mode vec_mode = nir_var_shader_in; + + /* There is also no benefit from re-vectorizing the outputs of the last pre-rasterization + * stage here, because ac_nir_lower_ngg/legacy already takes care of that. + */ + if (!radv_is_last_vgt_stage(&stages[s])) + vec_mode |= nir_var_shader_out; + + NIR_PASS(_, producer, nir_opt_vectorize_io, vec_mode, true); } /* Gather shader info; at least the I/O info likely changed From a13340aa83607d27825501bb42d567f2cffa11a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 8 Dec 2025 12:42:50 -0600 Subject: [PATCH 3/8] radv: Only run some optimizations when scalarization made progress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These passes are called to clean up after scalarization, so only call them when scalarization actually made progress. No Fossil DB changes on Strix Halo (GFX11.5) Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Reviewed-by: Georg Lehmann --- src/amd/vulkan/radv_pipeline_graphics.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 2db5ebb8e33..5ce73c8cc55 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1719,14 +1719,17 @@ radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages, enum amd_g /* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */ nir_variable_mode sca_mode = nir_var_shader_in; + bool sca_progress; if (s != MESA_SHADER_FRAGMENT) sca_mode |= nir_var_shader_out; - NIR_PASS(_, producer, nir_lower_io_to_scalar, sca_mode, NULL, NULL); + NIR_PASS(sca_progress, producer, nir_lower_io_to_scalar, sca_mode, NULL, NULL); - /* Eliminate useless vec->mov copies resulting from scalarization. */ - NIR_PASS(_, producer, nir_opt_copy_prop); - NIR_PASS(_, producer, nir_opt_constant_folding); + if (sca_progress) { + /* Eliminate useless vec->mov copies resulting from scalarization. */ + NIR_PASS(_, producer, nir_opt_copy_prop); + NIR_PASS(_, producer, nir_opt_constant_folding); + } } int highest_changed_producer = -1; From be2f0d5b3e5799ead8e7653610a519b35495db07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 8 Dec 2025 12:48:21 -0600 Subject: [PATCH 4/8] radv: Don't call nir_opt_combine_stores anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also no need for remove nir_lower_tess_level_array_vars_to_vec. These should be now handled by nir_opt_vectorize_io. Fossil DB stats on Strix Halo (GFX11.5): Totals from 385 (0.48% of 79825) affected shaders: Instrs: 386363 -> 385212 (-0.30%); split: -0.30%, +0.01% CodeSize: 1910824 -> 1906236 (-0.24%); split: -0.25%, +0.01% Latency: 1019699 -> 1018973 (-0.07%); split: -0.08%, +0.01% InvThroughput: 158146 -> 156881 (-0.80%); split: -0.81%, +0.01% Copies: 22684 -> 21129 (-6.86%); split: -6.88%, +0.03% VALU: 242937 -> 241382 (-0.64%); split: -0.64%, +0.00% Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Reviewed-by: Georg Lehmann --- src/amd/vulkan/radv_pipeline_graphics.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 5ce73c8cc55..c7418dae0a0 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1374,11 +1374,6 @@ radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *pr (producer->info.stage == MESA_SHADER_VERTEX && has_geom_or_tess) || (producer->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) { NIR_PASS(_, producer, nir_opt_vectorize_io_vars, nir_var_shader_out); - - if (producer->info.stage == MESA_SHADER_TESS_CTRL) - NIR_PASS(_, producer, nir_lower_tess_level_array_vars_to_vec); - - NIR_PASS(_, producer, nir_opt_combine_stores, nir_var_shader_out); } if (consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL || From 78469c46fabe2cdb2784b2b211a315ed43e666c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 8 Dec 2025 13:06:12 -0600 Subject: [PATCH 5/8] radv: Don't call nir_compact_varyings anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nir_compact_varyings is not necessary anymore, because everything that it does, is also done by nir_opt_varyings. The resulting shader stats are slightly negative because without nir_compact_varyings, the I/O variables in TCS are sorted less "fortunately". After discussing this with the RADV team, we decided that this is an acceptable loss. Fossil DB stats on Strix Halo (GFX11.5): Totals from 4581 (5.74% of 79825) affected shaders: MaxWaves: 130518 -> 130594 (+0.06%); split: +0.06%, -0.00% Instrs: 3036142 -> 3038104 (+0.06%); split: -0.06%, +0.13% CodeSize: 15577024 -> 15583772 (+0.04%); split: -0.06%, +0.10% VGPRs: 228444 -> 228300 (-0.06%); split: -0.14%, +0.07% Latency: 13923113 -> 13927664 (+0.03%); split: -0.03%, +0.07% InvThroughput: 1952386 -> 1954383 (+0.10%); split: -0.08%, +0.18% VClause: 43367 -> 43357 (-0.02%); split: -0.11%, +0.09% SClause: 62081 -> 62090 (+0.01%); split: -0.01%, +0.02% Copies: 178828 -> 181091 (+1.27%); split: -0.87%, +2.13% PreSGPRs: 195953 -> 196018 (+0.03%) PreVGPRs: 162364 -> 162286 (-0.05%) VALU: 1839993 -> 1842215 (+0.12%); split: -0.08%, +0.20% SALU: 340789 -> 340776 (-0.00%); split: -0.01%, +0.00% VMEM: 100026 -> 100139 (+0.11%); split: -0.00%, +0.11% Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Reviewed-by: Georg Lehmann --- src/amd/vulkan/radv_pipeline_graphics.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index c7418dae0a0..8d5f59cc890 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1356,16 +1356,6 @@ radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *pr nir_remove_unused_varyings(producer, consumer); - nir_compact_varyings(producer, consumer, true); - - nir_validate_shader(producer, "after nir_compact_varyings"); - nir_validate_shader(consumer, "after nir_compact_varyings"); - - if (producer->info.stage == MESA_SHADER_MESH) { - /* nir_compact_varyings can change the location of per-vertex and per-primitive outputs */ - nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer)); - } - const bool has_geom_or_tess = consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL; const bool merged_gs = consumer->info.stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX9; From 3d3e87109137299833af0da7f7789aef3548e698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 8 Dec 2025 13:09:12 -0600 Subject: [PATCH 6/8] radv: Don't call nir_remove_unused_varyings anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nir_remove_unused_varyings pass is not necessary anymore, because nir_opt_varyings already does the same. Fossil DB stats on Strix Halo (GFX11.5): Totals from 3096 (3.88% of 79825) affected shaders: MaxWaves: 91466 -> 91470 (+0.00%) Instrs: 1359248 -> 1357129 (-0.16%); split: -0.39%, +0.24% CodeSize: 6736652 -> 6730116 (-0.10%); split: -0.37%, +0.27% VGPRs: 141372 -> 141348 (-0.02%) Latency: 5719906 -> 5712893 (-0.12%); split: -0.21%, +0.09% InvThroughput: 758400 -> 760565 (+0.29%); split: -0.18%, +0.46% VClause: 24838 -> 24821 (-0.07%); split: -0.15%, +0.08% SClause: 30638 -> 29971 (-2.18%); split: -2.25%, +0.07% Copies: 101046 -> 103098 (+2.03%); split: -1.31%, +3.34% Branches: 27897 -> 28051 (+0.55%) PreSGPRs: 120136 -> 120154 (+0.01%); split: -0.67%, +0.69% PreVGPRs: 97455 -> 97476 (+0.02%); split: -0.00%, +0.02% VALU: 758045 -> 760188 (+0.28%); split: -0.14%, +0.42% SALU: 196083 -> 194274 (-0.92%); split: -0.97%, +0.05% VMEM: 55104 -> 55077 (-0.05%); split: -0.05%, +0.00% SMEM: 57261 -> 55825 (-2.51%); split: -2.56%, +0.05% Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Reviewed-by: Georg Lehmann --- src/amd/vulkan/radv_pipeline_graphics.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 8d5f59cc890..970bb6ea0f5 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1354,8 +1354,6 @@ radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *pr NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); - nir_remove_unused_varyings(producer, consumer); - const bool has_geom_or_tess = consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL; const bool merged_gs = consumer->info.stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX9; From a179b200f7bf15a0e8f43bdff0bde4ace3ab8d2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 8 Dec 2025 13:11:33 -0600 Subject: [PATCH 7/8] radv: Don't call nir_remove_dead_variables during linking anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Already done elsewhere. No Fossil DB changes on Strix Halo (GFX11.5). Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Reviewed-by: Georg Lehmann --- src/amd/vulkan/radv_pipeline_graphics.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 970bb6ea0f5..57ffa5e6b50 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1351,9 +1351,6 @@ radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *pr NIR_PASS(_, consumer, nir_opt_dce); } - NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); - NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); - const bool has_geom_or_tess = consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL; const bool merged_gs = consumer->info.stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX9; From 7eb459b8ce9ef0775ec770cfc3e3c6af88e84a28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 8 Dec 2025 13:16:51 -0600 Subject: [PATCH 8/8] radv: Don't call nir_link_opt_varyings anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old nir_link_opt_varyings pass is superseded by the new nir_opt_varyings pass. Fossil DB stats on Strix Halo (GFX11.5): Totals from 1293 (1.62% of 79825) affected shaders: MaxWaves: 37136 -> 37148 (+0.03%) Instrs: 989753 -> 989954 (+0.02%); split: -0.07%, +0.09% CodeSize: 5162192 -> 5162744 (+0.01%); split: -0.05%, +0.06% VGPRs: 68304 -> 68400 (+0.14%); split: -0.16%, +0.30% Latency: 7935046 -> 7934604 (-0.01%); split: -0.02%, +0.02% InvThroughput: 1292238 -> 1292393 (+0.01%); split: -0.04%, +0.05% VClause: 16647 -> 16639 (-0.05%); split: -0.06%, +0.01% SClause: 27474 -> 27491 (+0.06%); split: -0.08%, +0.14% Copies: 68827 -> 69131 (+0.44%); split: -0.50%, +0.94% Branches: 19487 -> 19505 (+0.09%) PreSGPRs: 55883 -> 55908 (+0.04%); split: -0.01%, +0.05% PreVGPRs: 47987 -> 48126 (+0.29%); split: -0.00%, +0.29% VALU: 574149 -> 574426 (+0.05%); split: -0.10%, +0.14% SALU: 141793 -> 141738 (-0.04%); split: -0.10%, +0.06% Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Reviewed-by: Georg Lehmann --- src/amd/vulkan/radv_pipeline_graphics.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 57ffa5e6b50..6c37d5fde89 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1342,15 +1342,6 @@ radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *pr if (gfx_state->enable_remove_point_size) radv_remove_point_size(gfx_state, producer, consumer); - if (nir_link_opt_varyings(producer, consumer)) { - nir_validate_shader(producer, "after nir_link_opt_varyings"); - nir_validate_shader(consumer, "after nir_link_opt_varyings"); - - NIR_PASS(_, consumer, nir_opt_constant_folding); - NIR_PASS(_, consumer, nir_opt_algebraic); - NIR_PASS(_, consumer, nir_opt_dce); - } - const bool has_geom_or_tess = consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL; const bool merged_gs = consumer->info.stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX9;