nir: add a pass that moves output stores to the end of the shader

required by vc4 & vc5 to merge the rest of the lowered IO code for st/mesa

Acked-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33053>
This commit is contained in:
Marek Olšák 2025-01-16 00:56:41 -05:00 committed by Marge Bot
parent 15ada70677
commit b65973240c
3 changed files with 100 additions and 0 deletions

View file

@ -218,6 +218,7 @@ files_libnir = files(
'nir_lower_sysvals_to_varyings.c',
'nir_metadata.c',
'nir_mod_analysis.c',
'nir_move_output_stores_to_end.c',
'nir_move_vec_src_uses_to_dest.c',
'nir_normalize_cubemap_coords.c',
'nir_opt_access.c',

View file

@ -6070,6 +6070,7 @@ bool nir_clear_shared_memory(nir_shader *shader,
const unsigned chunk_size);
bool nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs);
bool nir_move_output_stores_to_end(nir_shader *nir);
bool nir_lower_vec_to_regs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
const void *_data);
bool nir_lower_alpha_test(nir_shader *shader, enum compare_func func,

View file

@ -0,0 +1,98 @@
/*
* Copyright © 2025 Advanced Micro Devices, Inc.
* SPDX-License-Identifier: MIT
*/
/* This pass moves output stores to the end of the shader.
* (only those that can be moved trivially)
*/
#include "nir.h"
#include "nir_builder.h"
/* Put the position in the last slot to make its store last. */
#define LAST_SLOT NUM_TOTAL_VARYING_SLOTS
#define NUM_SLOTS ((LAST_SLOT + 1) * 4)
typedef struct {
nir_instr *stores[NUM_SLOTS];
/* Whether the output component is written only once or multiple times. */
BITSET_DECLARE(single, NUM_SLOTS);
BITSET_DECLARE(multiple, NUM_SLOTS);
} output_stores_state;
static bool
gather_output_stores(struct nir_builder *b, nir_intrinsic_instr *intr,
void *opaque)
{
output_stores_state *state = (output_stores_state *)opaque;
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
unsigned location = nir_intrinsic_io_semantics(intr).location;
unsigned component = nir_intrinsic_component(intr);
assert(location < NUM_TOTAL_VARYING_SLOTS);
assert(component < 4);
assert(!nir_intrinsic_io_semantics(intr).high_16bits);
/* Stores must be in the top level block. */
assert(intr->instr.block->cf_node.parent->type == nir_cf_node_function);
/* Put the position in the last slot to make its store last. */
if (location == VARYING_SLOT_POS)
location = LAST_SLOT;
unsigned slot = location * 4 + component;
unsigned num_components = intr->src[0].ssa->num_components;
/* Each component must be written only once. */
bool multiple = false;
for (unsigned i = 0; i < num_components; i++) {
if (BITSET_TEST(state->multiple, slot)) {
multiple = true;
} else if (BITSET_TEST(state->single, slot)) {
BITSET_CLEAR(state->single, slot);
BITSET_SET(state->multiple, slot);
multiple = true;
}
}
if (!multiple) {
state->stores[slot] = &intr->instr;
BITSET_SET_RANGE_INSIDE_WORD(state->single, slot,
slot + num_components - 1);
}
return false;
}
bool
nir_move_output_stores_to_end(nir_shader *nir)
{
assert(nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_TESS_EVAL);
output_stores_state state;
memset(&state, 0, sizeof(state));
/* Gather output stores. */
nir_shader_intrinsics_pass(nir, gather_output_stores, nir_metadata_all,
&state);
/* Move output stores to the end (only those that we can move). */
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
bool progress = false;
unsigned i;
BITSET_FOREACH_SET(i, state.single, NUM_SLOTS) {
if (!state.stores[i])
continue;
nir_instr_remove(state.stores[i]);
nir_instr_insert(nir_after_impl(impl), state.stores[i]);
progress = true;
}
nir_metadata_preserve(impl, progress ? nir_metadata_control_flow :
nir_metadata_all);
return progress;
}