Merge branch 'nir-move-to-top-control' into 'main'

nir/opt_move_to_top: add an option to exclude moving at_offset/at_sample loads

See merge request mesa/mesa!41167
This commit is contained in:
Marek Olšák 2026-05-08 00:11:56 +00:00
commit a0026241b5
6 changed files with 49 additions and 14 deletions

View file

@ -377,7 +377,7 @@ radv_postprocess_nir(const struct radv_compiler_info *compiler_info, const struc
/* Always load all VS inputs at the top to eliminate needless VMEM->s_wait->VMEM sequences.
* Each s_wait can cost 1000 cycles, so make sure all VS input loads are grouped.
*/
NIR_PASS(_, stage->nir, nir_opt_move_to_top, nir_move_to_top_input_loads);
NIR_PASS(_, stage->nir, nir_opt_move_to_top, nir_move_to_top_input_loads_simple);
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
NIR_PASS(_, stage->nir, nir_opt_move, sink_opts);
} else {

View file

@ -5892,8 +5892,21 @@ typedef enum {
nir_move_to_entry_block_only = BITFIELD_BIT(0),
/* Instruction options. */
nir_move_to_top_input_loads = BITFIELD_BIT(1),
nir_move_to_top_load_smem_amd = BITFIELD_BIT(2),
/* Simple input loads are non-interpolated loads and interpolated loads
* with pixel, centroid, and sample barycentrics. Other barycentrics are
* excluded.
*/
nir_move_to_top_input_loads_simple = BITFIELD_BIT(1),
/* Interpolated loads with non-trivial barycentrics, such as at_offset and
* at_sample. (this option is not recommended for Control (game) because
* it moves at_sample with complex ALU perspective-correct interpolation
* out of conditional blocks)
*/
nir_move_to_top_input_loads_complex_baryc = BITFIELD_BIT(2),
nir_move_to_top_load_smem_amd = BITFIELD_BIT(3),
} nir_opt_move_to_top_options;
bool nir_opt_move_to_top(nir_shader *nir, nir_opt_move_to_top_options options);

View file

@ -12,9 +12,9 @@
* of instructions that are moved.
*
* Used either as a scheduling optimization or to accommodate hw or compiler
* backend limitations. You would typically use this if you don't use
* nir_lower_io_vars_to_temporaries and want to move input loads to top,
* but note that such global code motion passes often increase register usage.
* backend limitations. It would typically be used if
* nir_lower_io_vars_to_temporaries isn't used and it's desirable to move input
* loads to top, but such global code motion often increases register usage.
*/
#include "nir.h"
@ -138,10 +138,29 @@ handle_load(nir_builder *b, nir_intrinsic_instr *intr, void *_state)
* an input load. The specific intrinsics that are moved are
* listed in can_move_src_to_top.
*/
move |= state->options & nir_move_to_top_input_loads &&
nir_intrinsic_has_io_semantics(intr) &&
nir_intrinsic_infos[intr->intrinsic].has_dest &&
!nir_is_output_load(intr);
if (state->options & (nir_move_to_top_input_loads_simple |
nir_move_to_top_input_loads_complex_baryc) &&
nir_intrinsic_has_io_semantics(intr) &&
nir_intrinsic_infos[intr->intrinsic].has_dest &&
!nir_is_output_load(intr)) {
if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
nir_intrinsic_instr *baryc =
nir_def_as_intrinsic_or_null(intr->src[0].ssa);
nir_opt_move_to_top_options baryc_option =
baryc &&
(baryc->intrinsic == nir_intrinsic_load_barycentric_pixel ||
baryc->intrinsic == nir_intrinsic_load_barycentric_centroid ||
baryc->intrinsic == nir_intrinsic_load_barycentric_sample) ?
nir_move_to_top_input_loads_simple :
nir_move_to_top_input_loads_complex_baryc;
move |= !!(state->options & baryc_option);
} else {
move |= !!(state->options & nir_move_to_top_input_loads_simple);
}
}
move |= state->options & nir_move_to_top_load_smem_amd &&
(intr->intrinsic == nir_intrinsic_load_global_amd &&

View file

@ -5604,7 +5604,7 @@ nir_opt_varyings_bulk(nir_shader **shaders, uint32_t num_shaders, bool spirv,
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(_, nir, nir_opt_move_to_top,
nir_move_to_entry_block_only |
nir_move_to_top_input_loads);
nir_move_to_top_input_loads_simple);
}
/* nir_opt_varyings requires scalar IO. Scalarize all varyings (not just

View file

@ -728,8 +728,11 @@ ir3_finalize_nir(struct ir3_compiler *compiler,
* more optimal at the top.
*/
if (s->info.stage == MESA_SHADER_VERTEX ||
s->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS(_, s, nir_opt_move_to_top, nir_move_to_top_input_loads);
s->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(_, s, nir_opt_move_to_top,
nir_move_to_top_input_loads_simple |
nir_move_to_top_input_loads_complex_baryc);
}
if (s->info.stage == MESA_SHADER_GEOMETRY) {
/* nir_unlower_io_to_vars expects constant indirect offsets to be folded

View file

@ -914,7 +914,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx)
*/
if (nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS(progress, nir, nir_opt_move_to_top, nir_move_to_top_input_loads);
NIR_PASS(progress, nir, nir_opt_move_to_top, nir_move_to_top_input_loads_simple);
/* Remove dead temps before we lower indirect indexing. */
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);