mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 14:20:11 +01:00
nir: add new pass nir_lower_io_indirect_loads
This is a partial replacement for nir_lower_io_vars_to_temporaries. It supports all input and output loads. It doesn't handle stores. The motivation is to improve compile times. The main differences compared to nir_lower_io_vars_to_temporaries are: - it only lowers indirect loads to temps and doesn't touch direct loads which improves compile times and removes the need for nir_lower_vars_to_ssa afterward because indirect temp access can't be lowered to SSA - it doesn't move all input loads to the top; it only moves those input loads to the top whose indirect loads are lowered (which improves register usage because direct loads are not moved) - it doesn't have to deal with complexities of variables Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36018>
This commit is contained in:
parent
7aba9b3ebe
commit
3dd9a9782b
4 changed files with 344 additions and 0 deletions
|
|
@ -189,6 +189,7 @@ else
|
|||
'nir_lower_int_to_float.c',
|
||||
'nir_lower_io.c',
|
||||
'nir_lower_io_array_vars_to_elements.c',
|
||||
'nir_lower_io_indirect_loads.c',
|
||||
'nir_lower_io_vars_to_temporaries.c',
|
||||
'nir_lower_io_to_scalar.c',
|
||||
'nir_lower_io_vars_to_scalar.c',
|
||||
|
|
|
|||
|
|
@ -5053,6 +5053,7 @@ bool nir_lower_io(nir_shader *shader,
|
|||
bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes);
|
||||
void nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs);
|
||||
bool nir_io_add_intrinsic_xfb_info(nir_shader *nir);
|
||||
bool nir_lower_io_indirect_loads(nir_shader *nir, nir_variable_mode modes);
|
||||
|
||||
bool
|
||||
nir_lower_vars_to_explicit_types(nir_shader *shader,
|
||||
|
|
@ -5240,6 +5241,7 @@ nir_src *nir_get_io_index_src(nir_intrinsic_instr *instr);
|
|||
nir_src *nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr);
|
||||
nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call);
|
||||
|
||||
bool nir_is_output_load(nir_intrinsic_instr *intr);
|
||||
bool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage);
|
||||
|
||||
bool nir_lower_reg_intrinsics_to_ssa_impl(nir_function_impl *impl);
|
||||
|
|
|
|||
|
|
@ -1011,6 +1011,15 @@ nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
nir_is_output_load(nir_intrinsic_instr *intr)
|
||||
{
|
||||
return intr->intrinsic == nir_intrinsic_load_output ||
|
||||
intr->intrinsic == nir_intrinsic_load_per_vertex_output ||
|
||||
intr->intrinsic == nir_intrinsic_load_per_primitive_output ||
|
||||
intr->intrinsic == nir_intrinsic_load_per_view_output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the array index source for an arrayed load/store intrinsic.
|
||||
*/
|
||||
|
|
|
|||
332
src/compiler/nir/nir_lower_io_indirect_loads.c
Normal file
332
src/compiler/nir/nir_lower_io_indirect_loads.c
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* Copyright 2025 Advanced Micro Devices, Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* This pass expects IO intrinsics (load_input/load_output/...) and lowers
|
||||
* loads with indirect slot indexing to temp indexing. Supported cases:
|
||||
* - VS, TCS, TES, GS, FS inputs
|
||||
* - TCS outputs
|
||||
*
|
||||
* Indirect loads are replaced with direct loads whose results are stored in
|
||||
* a temp array, and the original load is replaced with an indirect load from
|
||||
* the temp array.
|
||||
*
|
||||
* Direct loads are typically inserted at the beginning of the shader, and only
|
||||
* the temp array loads are inserted at the place of the original indirect
|
||||
* load.
|
||||
*
|
||||
* The exceptions are TCS output loads and interpolate_at_* loads where
|
||||
* the direct loads are always inserted at the place of the original indirect
|
||||
* load.
|
||||
*/
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
typedef struct {
|
||||
nir_variable *var[INTERP_MODE_COUNT];
|
||||
nir_variable *centroid[INTERP_MODE_COUNT];
|
||||
nir_variable *sample[INTERP_MODE_COUNT];
|
||||
unsigned first_comp;
|
||||
unsigned last_comp;
|
||||
bool declared;
|
||||
} var_info;
|
||||
|
||||
typedef struct {
|
||||
nir_variable_mode modes;
|
||||
|
||||
/* If arrays are loaded only once at the beginning, these are the local
|
||||
* variables.
|
||||
*/
|
||||
var_info input[NUM_TOTAL_VARYING_SLOTS];
|
||||
var_info input_hi[NUM_TOTAL_VARYING_SLOTS];
|
||||
var_info output[NUM_TOTAL_VARYING_SLOTS];
|
||||
var_info output_hi[NUM_TOTAL_VARYING_SLOTS];
|
||||
} lower_io_indir_loads_state;
|
||||
|
||||
static bool
|
||||
is_compact(nir_shader *nir, nir_intrinsic_instr *intr)
|
||||
{
|
||||
if (!nir->options->compact_arrays ||
|
||||
(nir->info.stage == MESA_SHADER_VERTEX && !nir_is_output_load(intr)))
|
||||
return false;
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
return (sem.location >= VARYING_SLOT_CLIP_DIST0 &&
|
||||
sem.location <= VARYING_SLOT_CULL_DIST1) ||
|
||||
(nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
||||
(sem.location == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
sem.location == VARYING_SLOT_TESS_LEVEL_OUTER));
|
||||
}
|
||||
|
||||
static var_info *
|
||||
get_load_var(nir_intrinsic_instr *intr, lower_io_indir_loads_state *state)
|
||||
{
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
bool is_output = nir_is_output_load(intr);
|
||||
bool high = sem.high_dvec2 || sem.high_16bits;
|
||||
|
||||
return is_output ? (high ? &state->output_hi[sem.location] :
|
||||
&state->output[sem.location]) :
|
||||
(high ? &state->input_hi[sem.location] :
|
||||
&state->input[sem.location]);
|
||||
}
|
||||
|
||||
static nir_variable **
|
||||
get_load_once_variable(gl_shader_stage stage, nir_intrinsic_instr *intr,
|
||||
lower_io_indir_loads_state *state)
|
||||
{
|
||||
if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
|
||||
/* FS input arrays are reloaded at every interpolate_at_offset/at_sample
|
||||
* because we assume that the same inputs can also be interpolated at
|
||||
* pixel/centroid.
|
||||
*/
|
||||
nir_intrinsic_instr *baryc = nir_src_as_intrinsic(intr->src[0]);
|
||||
if (!baryc)
|
||||
return false;
|
||||
|
||||
enum glsl_interp_mode interp = nir_intrinsic_interp_mode(baryc);
|
||||
|
||||
switch (baryc->intrinsic) {
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
return &get_load_var(intr, state)->var[interp];
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
return &get_load_var(intr, state)->centroid[interp];
|
||||
case nir_intrinsic_load_barycentric_sample:
|
||||
return &get_load_var(intr, state)->sample[interp];
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
} else if (intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
|
||||
intr->intrinsic == nir_intrinsic_load_input_vertex) {
|
||||
/* The per-vertex input is loaded at the beginning (not in place) only
|
||||
* if the vertex index is constant.
|
||||
*/
|
||||
if (!nir_scalar_is_const(nir_scalar_resolved(intr->src[0].ssa, 0)))
|
||||
return NULL;
|
||||
} else if (stage == MESA_SHADER_TESS_CTRL && nir_is_output_load(intr)) {
|
||||
/* TCS output arrays are reloaded at every indirect load. */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Other inputs can be loaded at the beginning. */
|
||||
return &get_load_var(intr, state)->var[0];
|
||||
}
|
||||
|
||||
static bool
|
||||
gather_indirect_inputs(struct nir_builder *b, nir_intrinsic_instr *intr,
|
||||
void *data)
|
||||
{
|
||||
if (!nir_intrinsic_has_io_semantics(intr) ||
|
||||
!nir_intrinsic_infos[intr->intrinsic].has_dest ||
|
||||
nir_is_output_load(intr))
|
||||
return false;
|
||||
|
||||
lower_io_indir_loads_state *state = (lower_io_indir_loads_state *)data;
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
|
||||
var_info *var = get_load_var(intr, state);
|
||||
unsigned first_comp = component + nir_def_first_component_read(&intr->def);
|
||||
unsigned last_comp = component + nir_def_last_component_read(&intr->def);
|
||||
|
||||
if (!var->declared) {
|
||||
var->declared = true;
|
||||
var->first_comp = first_comp;
|
||||
var->last_comp = last_comp;
|
||||
} else {
|
||||
var->first_comp = MIN2(var->first_comp, first_comp);
|
||||
var->last_comp = MAX2(var->last_comp, last_comp);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (!nir_intrinsic_has_io_semantics(intr) ||
|
||||
!nir_intrinsic_infos[intr->intrinsic].has_dest)
|
||||
return false;
|
||||
|
||||
lower_io_indir_loads_state *state = (lower_io_indir_loads_state *)data;
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
bool is_output = nir_is_output_load(intr);
|
||||
bool compact = is_compact(b->shader, intr);
|
||||
bool progress = false;
|
||||
|
||||
/* Lower indirect loads. This works for all loads (input, output,
|
||||
* interpolated) because we don't care what the load is. We just duplicate
|
||||
* it for each slot and set the offset/location.
|
||||
*/
|
||||
if (state->modes & (is_output ? nir_var_shader_out : nir_var_shader_in)) {
|
||||
nir_scalar array_index =
|
||||
nir_scalar_resolved(nir_get_io_offset_src(intr)->ssa, 0);
|
||||
|
||||
if (!nir_scalar_is_const(array_index)) {
|
||||
nir_variable *temp = NULL;
|
||||
nir_variable **array =
|
||||
get_load_once_variable(b->shader->info.stage, intr, state);
|
||||
unsigned first_comp, last_comp;
|
||||
unsigned intr_component = nir_intrinsic_component(intr);
|
||||
|
||||
/* Load the array components that are actually used. If we load
|
||||
* them at the beginning, load all that are read anywhere
|
||||
* in the shader.
|
||||
*/
|
||||
if (array) {
|
||||
var_info *var = get_load_var(intr, state);
|
||||
assert(var->declared);
|
||||
first_comp = var->first_comp;
|
||||
last_comp = var->last_comp;
|
||||
} else {
|
||||
first_comp = intr_component + nir_def_first_component_read(&intr->def);
|
||||
last_comp = intr_component + nir_def_last_component_read(&intr->def);
|
||||
}
|
||||
|
||||
/* Some arrays are loaded only once at the beginning of the shader,
|
||||
* others are loaded at every indirect load (such as TCS output loads).
|
||||
*/
|
||||
if (!array || !*array) {
|
||||
nir_def **elems = alloca(sizeof(nir_def*) * sem.num_slots);
|
||||
unsigned num_comp = last_comp - first_comp + 1;
|
||||
|
||||
if (array)
|
||||
b->cursor = nir_before_impl(b->impl);
|
||||
else
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *src0 = NULL;
|
||||
|
||||
/* Load the barycentrics if we are loading inputs at the beginning. */
|
||||
if (array &&
|
||||
intr->intrinsic == nir_intrinsic_load_interpolated_input) {
|
||||
nir_intrinsic_instr *baryc_intr = nir_src_as_intrinsic(intr->src[0]);
|
||||
|
||||
src0 = nir_load_barycentric(b, baryc_intr->intrinsic,
|
||||
nir_intrinsic_interp_mode(baryc_intr));
|
||||
}
|
||||
|
||||
/* Load the vertex index at the beginning if it's constant. */
|
||||
if (array &&
|
||||
(intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
|
||||
intr->intrinsic == nir_intrinsic_load_input_vertex)) {
|
||||
nir_scalar s = nir_scalar_resolved(intr->src[0].ssa, 0);
|
||||
assert(nir_scalar_is_const(s));
|
||||
src0 = nir_imm_int(b, nir_scalar_as_uint(s));
|
||||
}
|
||||
|
||||
/* Load individual array elements. */
|
||||
for (unsigned i = 0; i < sem.num_slots; i++) {
|
||||
/* Create a new load for each slot. */
|
||||
nir_intrinsic_instr *new_load =
|
||||
nir_intrinsic_instr_create(b->shader, intr->intrinsic);
|
||||
new_load->num_components = num_comp;
|
||||
nir_def_init(&new_load->instr, &new_load->def,
|
||||
num_comp, intr->def.bit_size);
|
||||
nir_intrinsic_copy_const_indices(new_load, intr);
|
||||
|
||||
/* Set the same srcs .. */
|
||||
for (unsigned src = 0;
|
||||
src < nir_intrinsic_infos[intr->intrinsic].num_srcs; src++)
|
||||
new_load->src[src] = nir_src_for_ssa(intr->src[src].ssa);
|
||||
|
||||
/* .. but change the indirect index to 0. */
|
||||
new_load->src[nir_get_io_offset_src_number(intr)] =
|
||||
nir_src_for_ssa(zero);
|
||||
|
||||
nir_intrinsic_set_component(new_load, first_comp);
|
||||
|
||||
/* Set barycentrics or the vertex index if we are loading inputs
|
||||
* at the beginning.
|
||||
*/
|
||||
if (src0)
|
||||
new_load->src[0] = nir_src_for_ssa(src0);
|
||||
|
||||
/* and set IO semantics to the location of the array element. */
|
||||
nir_io_semantics new_sem = sem;
|
||||
new_sem.num_slots = 1;
|
||||
|
||||
if (!compact) {
|
||||
new_sem.location += i;
|
||||
nir_intrinsic_set_base(new_load, nir_intrinsic_base(intr) + i);
|
||||
} else {
|
||||
new_sem.location += i / 4;
|
||||
nir_intrinsic_set_component(new_load, i % 4);
|
||||
nir_intrinsic_set_base(new_load, nir_intrinsic_base(intr) + i / 4);
|
||||
}
|
||||
|
||||
nir_intrinsic_set_io_semantics(new_load, new_sem);
|
||||
|
||||
nir_builder_instr_insert(b, &new_load->instr);
|
||||
elems[i] = &new_load->def;
|
||||
}
|
||||
|
||||
/* Put the array elements into a local array variable. */
|
||||
enum glsl_base_type base_type =
|
||||
intr->def.bit_size == 16 ? GLSL_TYPE_FLOAT16 : GLSL_TYPE_FLOAT;
|
||||
const glsl_type *type =
|
||||
glsl_array_type(glsl_vector_type(base_type, num_comp),
|
||||
sem.num_slots, 0);
|
||||
if (!array)
|
||||
array = &temp; /* loaded at the load instead of the beginning */
|
||||
*array = nir_local_variable_create(b->impl, type, "");
|
||||
|
||||
/* Fill the array with the loaded elements. */
|
||||
for (unsigned i = 0; i < sem.num_slots; i++) {
|
||||
nir_store_array_var_imm(b, *array, i, elems[i],
|
||||
BITFIELD_MASK(num_comp));
|
||||
}
|
||||
}
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
/* Get the indirect value and upsize the vector to the original size. */
|
||||
nir_def *value = nir_load_array_var(b, *array, array_index.def);
|
||||
|
||||
value = nir_shift_channels(b, value,
|
||||
(int)first_comp - (int)intr_component,
|
||||
intr->def.num_components);
|
||||
|
||||
nir_def_replace(&intr->def, value);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_indirect_loads(nir_function_impl *impl, nir_variable_mode modes)
|
||||
{
|
||||
lower_io_indir_loads_state *state = calloc(1, sizeof(*state));
|
||||
state->modes = modes;
|
||||
|
||||
if (modes & nir_var_shader_in) {
|
||||
nir_function_intrinsics_pass(impl, gather_indirect_inputs,
|
||||
nir_metadata_all, state);
|
||||
}
|
||||
|
||||
bool progress = nir_function_intrinsics_pass(impl, lower_load,
|
||||
nir_metadata_control_flow, state);
|
||||
free(state);
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
nir_lower_io_indirect_loads(nir_shader *nir, nir_variable_mode modes)
|
||||
{
|
||||
assert(modes & (nir_var_shader_in | nir_var_shader_out));
|
||||
assert(!(modes & ~(nir_var_shader_in | nir_var_shader_out)));
|
||||
assert((!(modes & nir_var_shader_out) ||
|
||||
nir->info.stage == MESA_SHADER_TESS_CTRL) &&
|
||||
"output lowering only supported for TCS");
|
||||
assert(nir->info.stage <= MESA_SHADER_FRAGMENT);
|
||||
|
||||
bool progress = false;
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
progress |= lower_indirect_loads(impl, modes);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue