radv: lower indirect derefs after linking

Scratch access isn't very optimizable, so more stores are optimized away
if we lower indirect derefs after both linking and radv_optimize_nir.

fossil-db (navi21):
Totals from 1264 (0.62% of 202427) affected shaders:
Instrs: 1504703 -> 1504708 (+0.00%); split: -0.02%, +0.02%
CodeSize: 8031388 -> 8031020 (-0.00%); split: -0.02%, +0.02%
SpillSGPRs: 1865 -> 1869 (+0.21%)
Latency: 12106362 -> 12106464 (+0.00%); split: -0.01%, +0.01%
InvThroughput: 4056269 -> 4056044 (-0.01%); split: -0.01%, +0.00%
VClause: 13927 -> 13940 (+0.09%)
SClause: 32382 -> 32396 (+0.04%); split: -0.03%, +0.08%
Copies: 188004 -> 187897 (-0.06%); split: -0.17%, +0.11%
Branches: 39045 -> 39052 (+0.02%); split: -0.01%, +0.03%
PreSGPRs: 79885 -> 79814 (-0.09%); split: -0.11%, +0.02%
VALU: 1072639 -> 1072532 (-0.01%); split: -0.01%, +0.00%
SALU: 187317 -> 187375 (+0.03%); split: -0.11%, +0.14%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Acked-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31265>
This commit is contained in:
Rhys Perry 2026-03-06 16:34:24 +00:00 committed by Marge Bot
parent 1943e88d56
commit 91d555c2cb
7 changed files with 71 additions and 16 deletions

View file

@ -218,6 +218,39 @@ ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct a
return ac_nir_unpack_value(b, value, rshift, bitwidth);
}
/* This lowers small indirect array derefs to if-else trees. We might want to do this before
* ac_nir_lower_indirect_derefs() to lower small array derefs to if-else trees earlier than
* lowering large array derefs to scratch. This is because we want to do the scratch lowering
* as late as possible (because scratch access isn't very optimizable), but the if-else tree
* lowering can be optimized. For example, an indirect access where we can know that all
* elements that might be accessed are equal could be replaced with a use of that element, or
* nir_opt_peephole_select() can flatten some of the if-else tree. */
bool
ac_nir_lower_indirect_derefs_early(nir_shader *shader)
{
struct set vars;
_mesa_pointer_set_init(&vars, NULL);
nir_foreach_function_impl(impl, shader) {
nir_foreach_function_temp_variable(var, impl) {
unsigned var_size, var_align;
glsl_get_natural_size_align_bytes(var->type, &var_size, &var_align);
if (var_size < 256)
_mesa_set_add(&vars, var);
}
}
bool progress = false;
if (vars.entries)
NIR_PASS(progress, shader, nir_lower_indirect_var_derefs_to_if_else_trees, &vars);
_mesa_set_fini(&vars, NULL);
return progress;
}
/* This lowers all indirect array derefs to either scratch adccess or if-else trees, ensuring
* that none remains.
*/
bool
ac_nir_lower_indirect_derefs(nir_shader *shader)
{

View file

@ -176,6 +176,9 @@ ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
enum amd_gfx_level gfx_level,
bool triangle_strip_adjacency_fix);
bool
ac_nir_lower_indirect_derefs_early(nir_shader *shader);
bool
ac_nir_lower_indirect_derefs(nir_shader *shader);

View file

@ -106,6 +106,9 @@ radv_compile_cs(const struct radv_compiler_info *compiler_info, struct radv_shad
/* Compile SPIR-V shader to NIR. */
cs_stage->nir = radv_shader_spirv_to_nir(compiler_info, cs_stage, NULL, is_internal);
NIR_PASS(_, cs_stage->nir, ac_nir_lower_indirect_derefs);
NIR_PASS(_, cs_stage->nir, nir_lower_vars_to_ssa);
radv_optimize_nir(cs_stage->nir, cs_stage->key.optimisations_disabled);
/* Run the shader info pass. */

View file

@ -2611,6 +2611,9 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st
radv_foreach_stage (i, active_nir_stages) {
int64_t stage_start = os_time_get_nano();
ac_nir_lower_indirect_derefs_early(stages[i].nir);
NIR_PASS(_, stages[i].nir, nir_lower_vars_to_ssa);
radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
stages[i].feedback.duration += os_time_get_nano() - stage_start;
@ -2635,6 +2638,28 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
radv_foreach_stage (i, active_nir_stages) {
int64_t stage_start = os_time_get_nano();
/* Indirect lowering must be called after the radv_optimize_nir() loop
* has been called at least once. Otherwise indirect lowering can
* bloat the instruction count of the loop and cause it to be
* considered too large for unrolling.
*
* We want to do this as late as possible because scratch access isn't
* very optimizable. We lower smaller arrays to SSA earlier with
* ac_nir_lower_indirect_derefs_early, because that can actually enable
* optimizations.
*/
bool indirect_derefs_lowered = false;
NIR_PASS(indirect_derefs_lowered, stages[i].nir, ac_nir_lower_indirect_derefs);
NIR_PASS(_, stages[i].nir, nir_lower_vars_to_ssa);
if (indirect_derefs_lowered && !stages[i].key.optimisations_disabled)
radv_optimize_nir(stages[i].nir, false);
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
radv_fill_shader_info(compiler_info, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
radv_declare_pipeline_args(compiler_info, stages, gfx_state, active_nir_stages, debug);

View file

@ -657,6 +657,12 @@ radv_rt_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv
{
stage->nir = radv_shader_spirv_to_nir(compiler_info, stage, NULL, false);
bool indirect_derefs_lowered = false;
NIR_PASS(indirect_derefs_lowered, stage->nir, ac_nir_lower_indirect_derefs);
NIR_PASS(_, stage->nir, nir_lower_vars_to_ssa);
if (indirect_derefs_lowered && !stage->key.optimisations_disabled)
radv_optimize_nir(stage->nir, false);
nir_foreach_variable_with_modes (var, stage->nir, nir_var_ray_hit_attrib) {
unsigned size, alignment;
glsl_get_natural_size_align_bytes(var->type, &size, &alignment);

View file

@ -877,21 +877,6 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct
}
}
/* Indirect lowering must be called after the radv_optimize_nir() loop
* has been called at least once. Otherwise indirect lowering can
* bloat the instruction count of the loop and cause it to be
* considered too large for unrolling.
*/
bool indirect_derefs_lowered = false;
NIR_PASS(indirect_derefs_lowered, nir, ac_nir_lower_indirect_derefs);
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
if (indirect_derefs_lowered && !stage->key.optimisations_disabled &&
nir->info.stage != MESA_SHADER_COMPUTE) {
/* Optimize the lowered code before the linking optimizations. */
radv_optimize_nir(nir, false);
}
return nir;
}

View file

@ -250,7 +250,7 @@ nir_lower_indirect_var_derefs_to_if_else_trees(nir_shader *shader,
bool progress = false;
nir_foreach_function_impl(impl, shader) {
progress = lower_indirects_impl(impl, nir_var_uniform, vars, UINT_MAX) ||
progress = lower_indirects_impl(impl, nir_var_all, vars, UINT_MAX) ||
progress;
}