mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
nir: add new pass nir_opt_move_to_top
This can be used to move input loads to top after we stop using nir_lower_io_vars_to_temporaries that does it unconditionally. It's more flexible than what nir_lower_io_vars_to_temporaries was doing, and can be extended to handle any instructions. Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36018>
This commit is contained in:
parent
3dd9a9782b
commit
a4e522f8b0
3 changed files with 186 additions and 0 deletions
|
|
@ -278,6 +278,7 @@ else
|
|||
'nir_opt_memcpy.c',
|
||||
'nir_opt_move.c',
|
||||
'nir_opt_move_discards_to_top.c',
|
||||
'nir_opt_move_to_top.c',
|
||||
'nir_opt_mqsad.c',
|
||||
'nir_opt_non_uniform_access.c',
|
||||
'nir_opt_offsets.c',
|
||||
|
|
|
|||
|
|
@ -5268,6 +5268,31 @@ bool nir_clear_shared_memory(nir_shader *shader,
|
|||
const unsigned shared_size,
|
||||
const unsigned chunk_size);
|
||||
|
||||
typedef enum {
|
||||
/* If the instructions to move are in the function entry block, do nothing,
|
||||
* else move them at the end (not the beginning) of the entry block.
|
||||
*
|
||||
* If this is not set, all selected instructions are always moved
|
||||
* to the beginning of the entry block.
|
||||
*
|
||||
* This has the following advantages:
|
||||
* - not moving all the way to the beginning reduces register usage within
|
||||
* the entry block
|
||||
* - CSE within the entry block is still maximally effective
|
||||
* (nir_opt_varyings recommends that each input component is loaded only
|
||||
* once, and this option + CSE guarantees that)
|
||||
* - the pass does nothing if all affected instructions are already
|
||||
* in the entry block.
|
||||
*/
|
||||
nir_move_to_entry_block_only = BITFIELD_BIT(0),
|
||||
|
||||
/* Instruction options. */
|
||||
nir_move_to_top_input_loads = BITFIELD_BIT(1),
|
||||
nir_move_to_top_load_smem_amd = BITFIELD_BIT(2),
|
||||
} nir_opt_move_to_top_options;
|
||||
|
||||
bool nir_opt_move_to_top(nir_shader *nir, nir_opt_move_to_top_options options);
|
||||
|
||||
bool nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs);
|
||||
bool nir_move_output_stores_to_end(nir_shader *nir);
|
||||
bool nir_lower_vec_to_regs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
|
||||
|
|
|
|||
160
src/compiler/nir/nir_opt_move_to_top.c
Normal file
160
src/compiler/nir/nir_opt_move_to_top.c
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* Copyright 2025 Advanced Micro Devices, Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* This pass moves intrinsics to the beginning of the shader. If an intrinsic
|
||||
* is non-movable, it's left as-is.
|
||||
*
|
||||
* The pass can move intrinsics, ALU, load_const, and undef to the top.
|
||||
* The last 3 instruction types are only moved to the top when their results
|
||||
* are used as sources by moved instructions. It preserves the relative order
|
||||
* of instructions that are moved.
|
||||
*
|
||||
* Used either as a scheduling optimization or to accommodate hw or compiler
|
||||
* backend limitations. You would typically use this if you don't use
|
||||
* nir_lower_io_vars_to_temporaries and want to move input loads to top,
|
||||
* but note that such global code motion passes often increase register usage.
|
||||
*/
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
typedef struct {
|
||||
nir_opt_move_to_top_options options;
|
||||
nir_function_impl *impl;
|
||||
} opt_move_to_top_state;
|
||||
|
||||
#define PASS_FLAG_CAN_MOVE BITFIELD_BIT(0)
|
||||
#define PASS_FLAG_CANT_MOVE BITFIELD_BIT(1)
|
||||
#define PASS_FLAG_MOVED BITFIELD_BIT(2)
|
||||
|
||||
static bool
|
||||
can_move_src_to_top(nir_src *src, void *_state)
|
||||
{
|
||||
opt_move_to_top_state *state = (opt_move_to_top_state *)_state;
|
||||
nir_instr *instr = src->ssa->parent_instr;
|
||||
|
||||
assert(util_bitcount(instr->pass_flags & (PASS_FLAG_CANT_MOVE |
|
||||
PASS_FLAG_CAN_MOVE)) <= 1);
|
||||
|
||||
if (instr->pass_flags & PASS_FLAG_CANT_MOVE)
|
||||
return false;
|
||||
if (instr->pass_flags & PASS_FLAG_CAN_MOVE)
|
||||
return true;
|
||||
|
||||
/* If the instruction is already in the entry block, there is nothing to do. */
|
||||
if (state->options & nir_move_to_entry_block_only &&
|
||||
instr->block == nir_start_block(state->impl)) {
|
||||
/* Mark as already moved. */
|
||||
instr->pass_flags |= PASS_FLAG_CAN_MOVE | PASS_FLAG_MOVED;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (instr->type != nir_instr_type_alu &&
|
||||
instr->type != nir_instr_type_intrinsic &&
|
||||
instr->type != nir_instr_type_load_const &&
|
||||
instr->type != nir_instr_type_undef) {
|
||||
instr->pass_flags |= PASS_FLAG_CANT_MOVE;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (instr->type == nir_instr_type_intrinsic) {
|
||||
/* Only these intrinsics are movable to the top. */
|
||||
switch (nir_instr_as_intrinsic(instr)->intrinsic) {
|
||||
/* Input loads and its sources. */
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
case nir_intrinsic_load_barycentric_sample:
|
||||
case nir_intrinsic_load_barycentric_at_offset:
|
||||
case nir_intrinsic_load_barycentric_at_sample:
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
case nir_intrinsic_load_per_primitive_input:
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
/* load_smem_amd and its sources. */
|
||||
case nir_intrinsic_load_scalar_arg_amd:
|
||||
case nir_intrinsic_load_smem_amd:
|
||||
break;
|
||||
default:
|
||||
instr->pass_flags |= PASS_FLAG_CANT_MOVE;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nir_foreach_src(instr, can_move_src_to_top, state)) {
|
||||
instr->pass_flags |= PASS_FLAG_CANT_MOVE;
|
||||
return false;
|
||||
}
|
||||
|
||||
instr->pass_flags |= PASS_FLAG_CAN_MOVE;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
move_src(nir_src *src, void *_state)
|
||||
{
|
||||
nir_instr *instr = src->ssa->parent_instr;
|
||||
nir_builder *b = (nir_builder *)_state;
|
||||
|
||||
if (instr->pass_flags & PASS_FLAG_MOVED)
|
||||
return true; /* already moved */
|
||||
|
||||
nir_foreach_src(instr, move_src, b);
|
||||
nir_instr_move(b->cursor, instr);
|
||||
b->cursor = nir_after_instr(instr);
|
||||
instr->pass_flags |= PASS_FLAG_MOVED;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
handle_load(nir_builder *b, nir_intrinsic_instr *intr, void *_state)
|
||||
{
|
||||
opt_move_to_top_state *state = (opt_move_to_top_state *)_state;
|
||||
bool move = false;
|
||||
|
||||
if (state->options & nir_move_to_entry_block_only &&
|
||||
intr->instr.block == nir_start_block(b->impl))
|
||||
return false;
|
||||
|
||||
/* If an intrinsic has a destination and it has IO semantics, it's
|
||||
* an input load. The specific intrinsics that are moved are
|
||||
* listed in can_move_src_to_top.
|
||||
*/
|
||||
move |= state->options & nir_move_to_top_input_loads &&
|
||||
nir_intrinsic_has_io_semantics(intr) &&
|
||||
nir_intrinsic_infos[intr->intrinsic].has_dest &&
|
||||
!nir_is_output_load(intr);
|
||||
|
||||
move |= state->options & nir_move_to_top_load_smem_amd &&
|
||||
intr->intrinsic == nir_intrinsic_load_smem_amd;
|
||||
|
||||
if (!move)
|
||||
return false;
|
||||
|
||||
nir_src intr_as_src = nir_src_for_ssa(&intr->def);
|
||||
|
||||
/* Initialize the cursor only once per function. */
|
||||
if (state->impl != b->impl) {
|
||||
if (state->options & nir_move_to_entry_block_only)
|
||||
b->cursor = nir_after_block(nir_start_block(b->impl));
|
||||
else
|
||||
b->cursor = nir_before_impl(b->impl);
|
||||
state->impl = b->impl;
|
||||
}
|
||||
|
||||
if (!can_move_src_to_top(&intr_as_src, state))
|
||||
return false;
|
||||
|
||||
move_src(&intr_as_src, b);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
nir_opt_move_to_top(nir_shader *nir, nir_opt_move_to_top_options options)
|
||||
{
|
||||
nir_shader_clear_pass_flags(nir);
|
||||
opt_move_to_top_state state = {options};
|
||||
return nir_shader_intrinsics_pass(nir, handle_load, nir_metadata_none,
|
||||
&state);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue