From 91d555c2cbef6e93832d7886f082eebce2669911 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 6 Mar 2026 16:34:24 +0000 Subject: [PATCH] radv: lower indirect derefs after linking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scratch access isn't very optimizable, so more stores are optimized away if we lower indirect derefs after both linking and radv_optimize_nir. fossil-db (navi21): Totals from 1264 (0.62% of 202427) affected shaders: Instrs: 1504703 -> 1504708 (+0.00%); split: -0.02%, +0.02% CodeSize: 8031388 -> 8031020 (-0.00%); split: -0.02%, +0.02% SpillSGPRs: 1865 -> 1869 (+0.21%) Latency: 12106362 -> 12106464 (+0.00%); split: -0.01%, +0.01% InvThroughput: 4056269 -> 4056044 (-0.01%); split: -0.01%, +0.00% VClause: 13927 -> 13940 (+0.09%) SClause: 32382 -> 32396 (+0.04%); split: -0.03%, +0.08% Copies: 188004 -> 187897 (-0.06%); split: -0.17%, +0.11% Branches: 39045 -> 39052 (+0.02%); split: -0.01%, +0.03% PreSGPRs: 79885 -> 79814 (-0.09%); split: -0.11%, +0.02% VALU: 1072639 -> 1072532 (-0.01%); split: -0.01%, +0.00% SALU: 187317 -> 187375 (+0.03%); split: -0.11%, +0.14% Signed-off-by: Rhys Perry Acked-by: Marek Olšák Reviewed-by: Georg Lehmann Part-of: --- src/amd/common/nir/ac_nir.c | 33 +++++++++++++++++++ src/amd/common/nir/ac_nir.h | 3 ++ src/amd/vulkan/radv_pipeline_compute.c | 3 ++ src/amd/vulkan/radv_pipeline_graphics.c | 25 ++++++++++++++ src/amd/vulkan/radv_pipeline_rt.c | 6 ++++ src/amd/vulkan/radv_shader.c | 15 --------- ...r_lower_indirect_derefs_to_if_else_trees.c | 2 +- 7 files changed, 71 insertions(+), 16 deletions(-) diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index c4606738d80..581c97a4041 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -218,6 +218,39 @@ ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct a return ac_nir_unpack_value(b, value, rshift, bitwidth); } +/* This lowers small indirect array derefs to if-else trees. We might want to do this before + * ac_nir_lower_indirect_derefs() to lower small array derefs to if-else trees earlier than + * lowering large array derefs to scratch. This is because we want to do the scratch lowering + * as late as possible (because scratch access isn't very optimizable), but the if-else tree + * lowering can be optimized. For example, an indirect access where we can know that all + * elements that might be accessed are equal could be replaced with a use of that element, or + * nir_opt_peephole_select() can flatten some of the if-else tree. */ +bool +ac_nir_lower_indirect_derefs_early(nir_shader *shader) +{ + struct set vars; + _mesa_pointer_set_init(&vars, NULL); + nir_foreach_function_impl(impl, shader) { + nir_foreach_function_temp_variable(var, impl) { + unsigned var_size, var_align; + glsl_get_natural_size_align_bytes(var->type, &var_size, &var_align); + if (var_size < 256) + _mesa_set_add(&vars, var); + } + } + + bool progress = false; + if (vars.entries) + NIR_PASS(progress, shader, nir_lower_indirect_var_derefs_to_if_else_trees, &vars); + + _mesa_set_fini(&vars, NULL); + + return progress; +} + +/* This lowers all indirect array derefs to either scratch adccess or if-else trees, ensuring + * that none remains. + */ bool ac_nir_lower_indirect_derefs(nir_shader *shader) { diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index eb785c3c84a..b7e6c313e2a 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -176,6 +176,9 @@ ac_nir_lower_gs_inputs_to_mem(nir_shader *shader, enum amd_gfx_level gfx_level, bool triangle_strip_adjacency_fix); +bool +ac_nir_lower_indirect_derefs_early(nir_shader *shader); + bool ac_nir_lower_indirect_derefs(nir_shader *shader); diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index 93223d811bd..d1b67a0c3e1 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -106,6 +106,9 @@ radv_compile_cs(const struct radv_compiler_info *compiler_info, struct radv_shad /* Compile SPIR-V shader to NIR. */ cs_stage->nir = radv_shader_spirv_to_nir(compiler_info, cs_stage, NULL, is_internal); + NIR_PASS(_, cs_stage->nir, ac_nir_lower_indirect_derefs); + NIR_PASS(_, cs_stage->nir, nir_lower_vars_to_ssa); + radv_optimize_nir(cs_stage->nir, cs_stage->key.optimisations_disabled); /* Run the shader info pass. */ diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 7a61709e6ff..54ec910224d 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2611,6 +2611,9 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st radv_foreach_stage (i, active_nir_stages) { int64_t stage_start = os_time_get_nano(); + ac_nir_lower_indirect_derefs_early(stages[i].nir); + NIR_PASS(_, stages[i].nir, nir_lower_vars_to_ssa); + radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled); stages[i].feedback.duration += os_time_get_nano() - stage_start; @@ -2635,6 +2638,28 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st stages[i].feedback.duration += os_time_get_nano() - stage_start; } + radv_foreach_stage (i, active_nir_stages) { + int64_t stage_start = os_time_get_nano(); + + /* Indirect lowering must be called after the radv_optimize_nir() loop + * has been called at least once. Otherwise indirect lowering can + * bloat the instruction count of the loop and cause it to be + * considered too large for unrolling. + * + * We want to do this as late as possible because scratch access isn't + * very optimizable. We lower smaller arrays to SSA earlier with + * ac_nir_lower_indirect_derefs_early, because that can actually enable + * optimizations. + */ + bool indirect_derefs_lowered = false; + NIR_PASS(indirect_derefs_lowered, stages[i].nir, ac_nir_lower_indirect_derefs); + NIR_PASS(_, stages[i].nir, nir_lower_vars_to_ssa); + if (indirect_derefs_lowered && !stages[i].key.optimisations_disabled) + radv_optimize_nir(stages[i].nir, false); + + stages[i].feedback.duration += os_time_get_nano() - stage_start; + } + radv_fill_shader_info(compiler_info, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages); radv_declare_pipeline_args(compiler_info, stages, gfx_state, active_nir_stages, debug); diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index d1ab88c6aaf..4a849e688ce 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -657,6 +657,12 @@ radv_rt_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv { stage->nir = radv_shader_spirv_to_nir(compiler_info, stage, NULL, false); + bool indirect_derefs_lowered = false; + NIR_PASS(indirect_derefs_lowered, stage->nir, ac_nir_lower_indirect_derefs); + NIR_PASS(_, stage->nir, nir_lower_vars_to_ssa); + if (indirect_derefs_lowered && !stage->key.optimisations_disabled) + radv_optimize_nir(stage->nir, false); + nir_foreach_variable_with_modes (var, stage->nir, nir_var_ray_hit_attrib) { unsigned size, alignment; glsl_get_natural_size_align_bytes(var->type, &size, &alignment); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 707d167862f..533120f9b12 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -877,21 +877,6 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct } } - /* Indirect lowering must be called after the radv_optimize_nir() loop - * has been called at least once. Otherwise indirect lowering can - * bloat the instruction count of the loop and cause it to be - * considered too large for unrolling. - */ - bool indirect_derefs_lowered = false; - NIR_PASS(indirect_derefs_lowered, nir, ac_nir_lower_indirect_derefs); - NIR_PASS(_, nir, nir_lower_vars_to_ssa); - - if (indirect_derefs_lowered && !stage->key.optimisations_disabled && - nir->info.stage != MESA_SHADER_COMPUTE) { - /* Optimize the lowered code before the linking optimizations. */ - radv_optimize_nir(nir, false); - } - return nir; } diff --git a/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c b/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c index 2f65315c571..57b148fb1e0 100644 --- a/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c +++ b/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c @@ -250,7 +250,7 @@ nir_lower_indirect_var_derefs_to_if_else_trees(nir_shader *shader, bool progress = false; nir_foreach_function_impl(impl, shader) { - progress = lower_indirects_impl(impl, nir_var_uniform, vars, UINT_MAX) || + progress = lower_indirects_impl(impl, nir_var_all, vars, UINT_MAX) || progress; }