mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 18:00:13 +01:00
the naming is a bit confusing no matter how you look at it. Within SPIR-V "global" memory is memory accessible from all threads. glsl "global" memory normally refers to shader thread private memory declared at global scope. As we already use "shared" for memory shared across all thrads of a work group the solution where everybody could be happy with is to rename "global" to "private" and use "global" later for memory usually stored within system accessible memory (be it VRAM or system RAM if keeping SVM in mind). glsl "local" memory is memory only accessible within a function, while SPIR-V "local" memory is memory accessible within the same workgroup. v2: rename local to function as well v3: rename vtn_variable_mode_local as well Signed-off-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
1576 lines
54 KiB
C
1576 lines
54 KiB
C
/*
|
|
* Copyright © 2018 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "nir.h"
|
|
#include "nir_builder.h"
|
|
#include "nir_deref.h"
|
|
#include "nir_vla.h"
|
|
|
|
#include "util/u_math.h"
|
|
|
|
|
|
struct split_var_state {
|
|
void *mem_ctx;
|
|
|
|
nir_shader *shader;
|
|
nir_function_impl *impl;
|
|
|
|
nir_variable *base_var;
|
|
};
|
|
|
|
struct field {
|
|
struct field *parent;
|
|
|
|
const struct glsl_type *type;
|
|
|
|
unsigned num_fields;
|
|
struct field *fields;
|
|
|
|
nir_variable *var;
|
|
};
|
|
|
|
static const struct glsl_type *
|
|
wrap_type_in_array(const struct glsl_type *type,
|
|
const struct glsl_type *array_type)
|
|
{
|
|
if (!glsl_type_is_array(array_type))
|
|
return type;
|
|
|
|
const struct glsl_type *elem_type =
|
|
wrap_type_in_array(type, glsl_get_array_element(array_type));
|
|
assert(glsl_get_explicit_stride(array_type) == 0);
|
|
return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
|
|
}
|
|
|
|
static int
|
|
num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
|
|
{
|
|
int num_levels = 0;
|
|
while (true) {
|
|
if (glsl_type_is_array_or_matrix(type)) {
|
|
num_levels++;
|
|
type = glsl_get_array_element(type);
|
|
} else if (glsl_type_is_vector_or_scalar(type)) {
|
|
return num_levels;
|
|
} else {
|
|
/* Not an array of vectors */
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
init_field_for_type(struct field *field, struct field *parent,
|
|
const struct glsl_type *type,
|
|
const char *name,
|
|
struct split_var_state *state)
|
|
{
|
|
*field = (struct field) {
|
|
.parent = parent,
|
|
.type = type,
|
|
};
|
|
|
|
const struct glsl_type *struct_type = glsl_without_array(type);
|
|
if (glsl_type_is_struct(struct_type)) {
|
|
field->num_fields = glsl_get_length(struct_type),
|
|
field->fields = ralloc_array(state->mem_ctx, struct field,
|
|
field->num_fields);
|
|
for (unsigned i = 0; i < field->num_fields; i++) {
|
|
char *field_name = NULL;
|
|
if (name) {
|
|
field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
|
|
glsl_get_struct_elem_name(struct_type, i));
|
|
} else {
|
|
field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
|
|
glsl_get_type_name(struct_type),
|
|
glsl_get_struct_elem_name(struct_type, i));
|
|
}
|
|
init_field_for_type(&field->fields[i], field,
|
|
glsl_get_struct_field(struct_type, i),
|
|
field_name, state);
|
|
}
|
|
} else {
|
|
const struct glsl_type *var_type = type;
|
|
for (struct field *f = field->parent; f; f = f->parent)
|
|
var_type = wrap_type_in_array(var_type, f->type);
|
|
|
|
nir_variable_mode mode = state->base_var->data.mode;
|
|
if (mode == nir_var_function) {
|
|
field->var = nir_local_variable_create(state->impl, var_type, name);
|
|
} else {
|
|
field->var = nir_variable_create(state->shader, mode, var_type, name);
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool
|
|
split_var_list_structs(nir_shader *shader,
|
|
nir_function_impl *impl,
|
|
struct exec_list *vars,
|
|
struct hash_table *var_field_map,
|
|
void *mem_ctx)
|
|
{
|
|
struct split_var_state state = {
|
|
.mem_ctx = mem_ctx,
|
|
.shader = shader,
|
|
.impl = impl,
|
|
};
|
|
|
|
struct exec_list split_vars;
|
|
exec_list_make_empty(&split_vars);
|
|
|
|
/* To avoid list confusion (we'll be adding things as we split variables),
|
|
* pull all of the variables we plan to split off of the list
|
|
*/
|
|
nir_foreach_variable_safe(var, vars) {
|
|
if (!glsl_type_is_struct(glsl_without_array(var->type)))
|
|
continue;
|
|
|
|
exec_node_remove(&var->node);
|
|
exec_list_push_tail(&split_vars, &var->node);
|
|
}
|
|
|
|
nir_foreach_variable(var, &split_vars) {
|
|
state.base_var = var;
|
|
|
|
struct field *root_field = ralloc(mem_ctx, struct field);
|
|
init_field_for_type(root_field, NULL, var->type, var->name, &state);
|
|
_mesa_hash_table_insert(var_field_map, var, root_field);
|
|
}
|
|
|
|
return !exec_list_is_empty(&split_vars);
|
|
}
|
|
|
|
static void
|
|
split_struct_derefs_impl(nir_function_impl *impl,
|
|
struct hash_table *var_field_map,
|
|
nir_variable_mode modes,
|
|
void *mem_ctx)
|
|
{
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr_safe(instr, block) {
|
|
if (instr->type != nir_instr_type_deref)
|
|
continue;
|
|
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
|
if (!(deref->mode & modes))
|
|
continue;
|
|
|
|
/* Clean up any dead derefs we find lying around. They may refer to
|
|
* variables we're planning to split.
|
|
*/
|
|
if (nir_deref_instr_remove_if_unused(deref))
|
|
continue;
|
|
|
|
if (!glsl_type_is_vector_or_scalar(deref->type))
|
|
continue;
|
|
|
|
nir_variable *base_var = nir_deref_instr_get_variable(deref);
|
|
struct hash_entry *entry =
|
|
_mesa_hash_table_search(var_field_map, base_var);
|
|
if (!entry)
|
|
continue;
|
|
|
|
struct field *root_field = entry->data;
|
|
|
|
nir_deref_path path;
|
|
nir_deref_path_init(&path, deref, mem_ctx);
|
|
|
|
struct field *tail_field = root_field;
|
|
for (unsigned i = 0; path.path[i]; i++) {
|
|
if (path.path[i]->deref_type != nir_deref_type_struct)
|
|
continue;
|
|
|
|
assert(i > 0);
|
|
assert(glsl_type_is_struct(path.path[i - 1]->type));
|
|
assert(path.path[i - 1]->type ==
|
|
glsl_without_array(tail_field->type));
|
|
|
|
tail_field = &tail_field->fields[path.path[i]->strct.index];
|
|
}
|
|
nir_variable *split_var = tail_field->var;
|
|
|
|
nir_deref_instr *new_deref = NULL;
|
|
for (unsigned i = 0; path.path[i]; i++) {
|
|
nir_deref_instr *p = path.path[i];
|
|
b.cursor = nir_after_instr(&p->instr);
|
|
|
|
switch (p->deref_type) {
|
|
case nir_deref_type_var:
|
|
assert(new_deref == NULL);
|
|
new_deref = nir_build_deref_var(&b, split_var);
|
|
break;
|
|
|
|
case nir_deref_type_array:
|
|
case nir_deref_type_array_wildcard:
|
|
new_deref = nir_build_deref_follower(&b, new_deref, p);
|
|
break;
|
|
|
|
case nir_deref_type_struct:
|
|
/* Nothing to do; we're splitting structs */
|
|
break;
|
|
|
|
default:
|
|
unreachable("Invalid deref type in path");
|
|
}
|
|
}
|
|
|
|
assert(new_deref->type == deref->type);
|
|
nir_ssa_def_rewrite_uses(&deref->dest.ssa,
|
|
nir_src_for_ssa(&new_deref->dest.ssa));
|
|
nir_deref_instr_remove_if_unused(deref);
|
|
}
|
|
}
|
|
}
|
|
|
|
/** A pass for splitting structs into multiple variables
|
|
*
|
|
* This pass splits arrays of structs into multiple variables, one for each
|
|
* (possibly nested) structure member. After this pass completes, no
|
|
* variables of the given mode will contain a struct type.
|
|
*/
|
|
bool
|
|
nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
|
|
{
|
|
void *mem_ctx = ralloc_context(NULL);
|
|
struct hash_table *var_field_map =
|
|
_mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
|
|
_mesa_key_pointer_equal);
|
|
|
|
assert((modes & (nir_var_private | nir_var_function)) == modes);
|
|
|
|
bool has_global_splits = false;
|
|
if (modes & nir_var_private) {
|
|
has_global_splits = split_var_list_structs(shader, NULL,
|
|
&shader->globals,
|
|
var_field_map, mem_ctx);
|
|
}
|
|
|
|
bool progress = false;
|
|
nir_foreach_function(function, shader) {
|
|
if (!function->impl)
|
|
continue;
|
|
|
|
bool has_local_splits = false;
|
|
if (modes & nir_var_function) {
|
|
has_local_splits = split_var_list_structs(shader, function->impl,
|
|
&function->impl->locals,
|
|
var_field_map, mem_ctx);
|
|
}
|
|
|
|
if (has_global_splits || has_local_splits) {
|
|
split_struct_derefs_impl(function->impl, var_field_map,
|
|
modes, mem_ctx);
|
|
|
|
nir_metadata_preserve(function->impl, nir_metadata_block_index |
|
|
nir_metadata_dominance);
|
|
progress = true;
|
|
}
|
|
}
|
|
|
|
ralloc_free(mem_ctx);
|
|
|
|
return progress;
|
|
}
|
|
|
|
struct array_level_info {
|
|
unsigned array_len;
|
|
bool split;
|
|
};
|
|
|
|
struct array_split {
|
|
/* Only set if this is the tail end of the splitting */
|
|
nir_variable *var;
|
|
|
|
unsigned num_splits;
|
|
struct array_split *splits;
|
|
};
|
|
|
|
struct array_var_info {
|
|
nir_variable *base_var;
|
|
|
|
const struct glsl_type *split_var_type;
|
|
|
|
bool split_var;
|
|
struct array_split root_split;
|
|
|
|
unsigned num_levels;
|
|
struct array_level_info levels[0];
|
|
};
|
|
|
|
static bool
|
|
init_var_list_array_infos(struct exec_list *vars,
|
|
struct hash_table *var_info_map,
|
|
void *mem_ctx)
|
|
{
|
|
bool has_array = false;
|
|
|
|
nir_foreach_variable(var, vars) {
|
|
int num_levels = num_array_levels_in_array_of_vector_type(var->type);
|
|
if (num_levels <= 0)
|
|
continue;
|
|
|
|
struct array_var_info *info =
|
|
rzalloc_size(mem_ctx, sizeof(*info) +
|
|
num_levels * sizeof(info->levels[0]));
|
|
|
|
info->base_var = var;
|
|
info->num_levels = num_levels;
|
|
|
|
const struct glsl_type *type = var->type;
|
|
for (int i = 0; i < num_levels; i++) {
|
|
assert(glsl_get_explicit_stride(type) == 0);
|
|
info->levels[i].array_len = glsl_get_length(type);
|
|
type = glsl_get_array_element(type);
|
|
|
|
/* All levels start out initially as split */
|
|
info->levels[i].split = true;
|
|
}
|
|
|
|
_mesa_hash_table_insert(var_info_map, var, info);
|
|
has_array = true;
|
|
}
|
|
|
|
return has_array;
|
|
}
|
|
|
|
static struct array_var_info *
|
|
get_array_var_info(nir_variable *var,
|
|
struct hash_table *var_info_map)
|
|
{
|
|
struct hash_entry *entry =
|
|
_mesa_hash_table_search(var_info_map, var);
|
|
return entry ? entry->data : NULL;
|
|
}
|
|
|
|
static struct array_var_info *
|
|
get_array_deref_info(nir_deref_instr *deref,
|
|
struct hash_table *var_info_map,
|
|
nir_variable_mode modes)
|
|
{
|
|
if (!(deref->mode & modes))
|
|
return NULL;
|
|
|
|
return get_array_var_info(nir_deref_instr_get_variable(deref),
|
|
var_info_map);
|
|
}
|
|
|
|
static void
|
|
mark_array_deref_used(nir_deref_instr *deref,
|
|
struct hash_table *var_info_map,
|
|
nir_variable_mode modes,
|
|
void *mem_ctx)
|
|
{
|
|
struct array_var_info *info =
|
|
get_array_deref_info(deref, var_info_map, modes);
|
|
if (!info)
|
|
return;
|
|
|
|
nir_deref_path path;
|
|
nir_deref_path_init(&path, deref, mem_ctx);
|
|
|
|
/* Walk the path and look for indirects. If we have an array deref with an
|
|
* indirect, mark the given level as not being split.
|
|
*/
|
|
for (unsigned i = 0; i < info->num_levels; i++) {
|
|
nir_deref_instr *p = path.path[i + 1];
|
|
if (p->deref_type == nir_deref_type_array &&
|
|
!nir_src_is_const(p->arr.index))
|
|
info->levels[i].split = false;
|
|
}
|
|
}
|
|
|
|
static void
|
|
mark_array_usage_impl(nir_function_impl *impl,
|
|
struct hash_table *var_info_map,
|
|
nir_variable_mode modes,
|
|
void *mem_ctx)
|
|
{
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
switch (intrin->intrinsic) {
|
|
case nir_intrinsic_copy_deref:
|
|
mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
|
|
var_info_map, modes, mem_ctx);
|
|
/* Fall Through */
|
|
|
|
case nir_intrinsic_load_deref:
|
|
case nir_intrinsic_store_deref:
|
|
mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
|
|
var_info_map, modes, mem_ctx);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
create_split_array_vars(struct array_var_info *var_info,
|
|
unsigned level,
|
|
struct array_split *split,
|
|
const char *name,
|
|
nir_shader *shader,
|
|
nir_function_impl *impl,
|
|
void *mem_ctx)
|
|
{
|
|
while (level < var_info->num_levels && !var_info->levels[level].split) {
|
|
name = ralloc_asprintf(mem_ctx, "%s[*]", name);
|
|
level++;
|
|
}
|
|
|
|
if (level == var_info->num_levels) {
|
|
/* We add parens to the variable name so it looks like "(foo[2][*])" so
|
|
* that further derefs will look like "(foo[2][*])[ssa_6]"
|
|
*/
|
|
name = ralloc_asprintf(mem_ctx, "(%s)", name);
|
|
|
|
nir_variable_mode mode = var_info->base_var->data.mode;
|
|
if (mode == nir_var_function) {
|
|
split->var = nir_local_variable_create(impl,
|
|
var_info->split_var_type, name);
|
|
} else {
|
|
split->var = nir_variable_create(shader, mode,
|
|
var_info->split_var_type, name);
|
|
}
|
|
} else {
|
|
assert(var_info->levels[level].split);
|
|
split->num_splits = var_info->levels[level].array_len;
|
|
split->splits = rzalloc_array(mem_ctx, struct array_split,
|
|
split->num_splits);
|
|
for (unsigned i = 0; i < split->num_splits; i++) {
|
|
create_split_array_vars(var_info, level + 1, &split->splits[i],
|
|
ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
|
|
shader, impl, mem_ctx);
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool
|
|
split_var_list_arrays(nir_shader *shader,
|
|
nir_function_impl *impl,
|
|
struct exec_list *vars,
|
|
struct hash_table *var_info_map,
|
|
void *mem_ctx)
|
|
{
|
|
struct exec_list split_vars;
|
|
exec_list_make_empty(&split_vars);
|
|
|
|
nir_foreach_variable_safe(var, vars) {
|
|
struct array_var_info *info = get_array_var_info(var, var_info_map);
|
|
if (!info)
|
|
continue;
|
|
|
|
bool has_split = false;
|
|
const struct glsl_type *split_type =
|
|
glsl_without_array_or_matrix(var->type);
|
|
for (int i = info->num_levels - 1; i >= 0; i--) {
|
|
if (info->levels[i].split) {
|
|
has_split = true;
|
|
continue;
|
|
}
|
|
|
|
/* If the original type was a matrix type, we'd like to keep that so
|
|
* we don't convert matrices into arrays.
|
|
*/
|
|
if (i == info->num_levels - 1 &&
|
|
glsl_type_is_matrix(glsl_without_array(var->type))) {
|
|
split_type = glsl_matrix_type(glsl_get_base_type(split_type),
|
|
glsl_get_components(split_type),
|
|
info->levels[i].array_len);
|
|
} else {
|
|
split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
|
|
}
|
|
}
|
|
|
|
if (has_split) {
|
|
info->split_var_type = split_type;
|
|
/* To avoid list confusion (we'll be adding things as we split
|
|
* variables), pull all of the variables we plan to split off of the
|
|
* main variable list.
|
|
*/
|
|
exec_node_remove(&var->node);
|
|
exec_list_push_tail(&split_vars, &var->node);
|
|
} else {
|
|
assert(split_type == var->type);
|
|
/* If we're not modifying this variable, delete the info so we skip
|
|
* it faster in later passes.
|
|
*/
|
|
_mesa_hash_table_remove_key(var_info_map, var);
|
|
}
|
|
}
|
|
|
|
nir_foreach_variable(var, &split_vars) {
|
|
struct array_var_info *info = get_array_var_info(var, var_info_map);
|
|
create_split_array_vars(info, 0, &info->root_split, var->name,
|
|
shader, impl, mem_ctx);
|
|
}
|
|
|
|
return !exec_list_is_empty(&split_vars);
|
|
}
|
|
|
|
static bool
|
|
deref_has_split_wildcard(nir_deref_path *path,
|
|
struct array_var_info *info)
|
|
{
|
|
if (info == NULL)
|
|
return false;
|
|
|
|
assert(path->path[0]->var == info->base_var);
|
|
for (unsigned i = 0; i < info->num_levels; i++) {
|
|
if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
|
|
info->levels[i].split)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
array_path_is_out_of_bounds(nir_deref_path *path,
|
|
struct array_var_info *info)
|
|
{
|
|
if (info == NULL)
|
|
return false;
|
|
|
|
assert(path->path[0]->var == info->base_var);
|
|
for (unsigned i = 0; i < info->num_levels; i++) {
|
|
nir_deref_instr *p = path->path[i + 1];
|
|
if (p->deref_type == nir_deref_type_array_wildcard)
|
|
continue;
|
|
|
|
if (nir_src_is_const(p->arr.index) &&
|
|
nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void
|
|
emit_split_copies(nir_builder *b,
|
|
struct array_var_info *dst_info, nir_deref_path *dst_path,
|
|
unsigned dst_level, nir_deref_instr *dst,
|
|
struct array_var_info *src_info, nir_deref_path *src_path,
|
|
unsigned src_level, nir_deref_instr *src)
|
|
{
|
|
nir_deref_instr *dst_p, *src_p;
|
|
|
|
while ((dst_p = dst_path->path[dst_level + 1])) {
|
|
if (dst_p->deref_type == nir_deref_type_array_wildcard)
|
|
break;
|
|
|
|
dst = nir_build_deref_follower(b, dst, dst_p);
|
|
dst_level++;
|
|
}
|
|
|
|
while ((src_p = src_path->path[src_level + 1])) {
|
|
if (src_p->deref_type == nir_deref_type_array_wildcard)
|
|
break;
|
|
|
|
src = nir_build_deref_follower(b, src, src_p);
|
|
src_level++;
|
|
}
|
|
|
|
if (src_p == NULL || dst_p == NULL) {
|
|
assert(src_p == NULL && dst_p == NULL);
|
|
nir_copy_deref(b, dst, src);
|
|
} else {
|
|
assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
|
|
src_p->deref_type == nir_deref_type_array_wildcard);
|
|
|
|
if ((dst_info && dst_info->levels[dst_level].split) ||
|
|
(src_info && src_info->levels[src_level].split)) {
|
|
/* There are no indirects at this level on one of the source or the
|
|
* destination so we are lowering it.
|
|
*/
|
|
assert(glsl_get_length(dst_path->path[dst_level]->type) ==
|
|
glsl_get_length(src_path->path[src_level]->type));
|
|
unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
|
|
for (unsigned i = 0; i < len; i++) {
|
|
nir_ssa_def *idx = nir_imm_int(b, i);
|
|
emit_split_copies(b, dst_info, dst_path, dst_level + 1,
|
|
nir_build_deref_array(b, dst, idx),
|
|
src_info, src_path, src_level + 1,
|
|
nir_build_deref_array(b, src, idx));
|
|
}
|
|
} else {
|
|
/* Neither side is being split so we just keep going */
|
|
emit_split_copies(b, dst_info, dst_path, dst_level + 1,
|
|
nir_build_deref_array_wildcard(b, dst),
|
|
src_info, src_path, src_level + 1,
|
|
nir_build_deref_array_wildcard(b, src));
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
split_array_copies_impl(nir_function_impl *impl,
|
|
struct hash_table *var_info_map,
|
|
nir_variable_mode modes,
|
|
void *mem_ctx)
|
|
{
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr_safe(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
|
|
if (copy->intrinsic != nir_intrinsic_copy_deref)
|
|
continue;
|
|
|
|
nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
|
|
nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
|
|
|
|
struct array_var_info *dst_info =
|
|
get_array_deref_info(dst_deref, var_info_map, modes);
|
|
struct array_var_info *src_info =
|
|
get_array_deref_info(src_deref, var_info_map, modes);
|
|
|
|
if (!src_info && !dst_info)
|
|
continue;
|
|
|
|
nir_deref_path dst_path, src_path;
|
|
nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
|
|
nir_deref_path_init(&src_path, src_deref, mem_ctx);
|
|
|
|
if (!deref_has_split_wildcard(&dst_path, dst_info) &&
|
|
!deref_has_split_wildcard(&src_path, src_info))
|
|
continue;
|
|
|
|
b.cursor = nir_instr_remove(©->instr);
|
|
|
|
emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
|
|
src_info, &src_path, 0, src_path.path[0]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
split_array_access_impl(nir_function_impl *impl,
|
|
struct hash_table *var_info_map,
|
|
nir_variable_mode modes,
|
|
void *mem_ctx)
|
|
{
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr_safe(instr, block) {
|
|
if (instr->type == nir_instr_type_deref) {
|
|
/* Clean up any dead derefs we find lying around. They may refer
|
|
* to variables we're planning to split.
|
|
*/
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
|
if (deref->mode & modes)
|
|
nir_deref_instr_remove_if_unused(deref);
|
|
continue;
|
|
}
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
if (intrin->intrinsic != nir_intrinsic_load_deref &&
|
|
intrin->intrinsic != nir_intrinsic_store_deref &&
|
|
intrin->intrinsic != nir_intrinsic_copy_deref)
|
|
continue;
|
|
|
|
const unsigned num_derefs =
|
|
intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
|
|
|
|
for (unsigned d = 0; d < num_derefs; d++) {
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
|
|
|
|
struct array_var_info *info =
|
|
get_array_deref_info(deref, var_info_map, modes);
|
|
if (!info)
|
|
continue;
|
|
|
|
nir_deref_path path;
|
|
nir_deref_path_init(&path, deref, mem_ctx);
|
|
|
|
b.cursor = nir_before_instr(&intrin->instr);
|
|
|
|
if (array_path_is_out_of_bounds(&path, info)) {
|
|
/* If one of the derefs is out-of-bounds, we just delete the
|
|
* instruction. If a destination is out of bounds, then it may
|
|
* have been in-bounds prior to shrinking so we don't want to
|
|
* accidentally stomp something. However, we've already proven
|
|
* that it will never be read so it's safe to delete. If a
|
|
* source is out of bounds then it is loading random garbage.
|
|
* For loads, we replace their uses with an undef instruction
|
|
* and for copies we just delete the copy since it was writing
|
|
* undefined garbage anyway and we may as well leave the random
|
|
* garbage in the destination alone.
|
|
*/
|
|
if (intrin->intrinsic == nir_intrinsic_load_deref) {
|
|
nir_ssa_def *u =
|
|
nir_ssa_undef(&b, intrin->dest.ssa.num_components,
|
|
intrin->dest.ssa.bit_size);
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
|
nir_src_for_ssa(u));
|
|
}
|
|
nir_instr_remove(&intrin->instr);
|
|
for (unsigned i = 0; i < num_derefs; i++)
|
|
nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
|
|
break;
|
|
}
|
|
|
|
struct array_split *split = &info->root_split;
|
|
for (unsigned i = 0; i < info->num_levels; i++) {
|
|
if (info->levels[i].split) {
|
|
nir_deref_instr *p = path.path[i + 1];
|
|
unsigned index = nir_src_as_uint(p->arr.index);
|
|
assert(index < info->levels[i].array_len);
|
|
split = &split->splits[index];
|
|
}
|
|
}
|
|
assert(!split->splits && split->var);
|
|
|
|
nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
|
|
for (unsigned i = 0; i < info->num_levels; i++) {
|
|
if (!info->levels[i].split) {
|
|
new_deref = nir_build_deref_follower(&b, new_deref,
|
|
path.path[i + 1]);
|
|
}
|
|
}
|
|
assert(new_deref->type == deref->type);
|
|
|
|
/* Rewrite the deref source to point to the split one */
|
|
nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
|
|
nir_src_for_ssa(&new_deref->dest.ssa));
|
|
nir_deref_instr_remove_if_unused(deref);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/** A pass for splitting arrays of vectors into multiple variables
|
|
*
|
|
* This pass looks at arrays (possibly multiple levels) of vectors (not
|
|
* structures or other types) and tries to split them into piles of variables,
|
|
* one for each array element. The heuristic used is simple: If a given array
|
|
* level is never used with an indirect, that array level will get split.
|
|
*
|
|
* This pass probably could handles structures easily enough but making a pass
|
|
* that could see through an array of structures of arrays would be difficult
|
|
* so it's best to just run nir_split_struct_vars first.
|
|
*/
|
|
bool
|
|
nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
|
|
{
|
|
void *mem_ctx = ralloc_context(NULL);
|
|
struct hash_table *var_info_map =
|
|
_mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
|
|
_mesa_key_pointer_equal);
|
|
|
|
assert((modes & (nir_var_private | nir_var_function)) == modes);
|
|
|
|
bool has_global_array = false;
|
|
if (modes & nir_var_private) {
|
|
has_global_array = init_var_list_array_infos(&shader->globals,
|
|
var_info_map, mem_ctx);
|
|
}
|
|
|
|
bool has_any_array = false;
|
|
nir_foreach_function(function, shader) {
|
|
if (!function->impl)
|
|
continue;
|
|
|
|
bool has_local_array = false;
|
|
if (modes & nir_var_function) {
|
|
has_local_array = init_var_list_array_infos(&function->impl->locals,
|
|
var_info_map, mem_ctx);
|
|
}
|
|
|
|
if (has_global_array || has_local_array) {
|
|
has_any_array = true;
|
|
mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
|
|
}
|
|
}
|
|
|
|
/* If we failed to find any arrays of arrays, bail early. */
|
|
if (!has_any_array) {
|
|
ralloc_free(mem_ctx);
|
|
return false;
|
|
}
|
|
|
|
bool has_global_splits = false;
|
|
if (modes & nir_var_private) {
|
|
has_global_splits = split_var_list_arrays(shader, NULL,
|
|
&shader->globals,
|
|
var_info_map, mem_ctx);
|
|
}
|
|
|
|
bool progress = false;
|
|
nir_foreach_function(function, shader) {
|
|
if (!function->impl)
|
|
continue;
|
|
|
|
bool has_local_splits = false;
|
|
if (modes & nir_var_function) {
|
|
has_local_splits = split_var_list_arrays(shader, function->impl,
|
|
&function->impl->locals,
|
|
var_info_map, mem_ctx);
|
|
}
|
|
|
|
if (has_global_splits || has_local_splits) {
|
|
split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
|
|
split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
|
|
|
|
nir_metadata_preserve(function->impl, nir_metadata_block_index |
|
|
nir_metadata_dominance);
|
|
progress = true;
|
|
}
|
|
}
|
|
|
|
ralloc_free(mem_ctx);
|
|
|
|
return progress;
|
|
}
|
|
|
|
struct array_level_usage {
|
|
unsigned array_len;
|
|
|
|
/* The value UINT_MAX will be used to indicate an indirect */
|
|
unsigned max_read;
|
|
unsigned max_written;
|
|
|
|
/* True if there is a copy that isn't to/from a shrinkable array */
|
|
bool has_external_copy;
|
|
struct set *levels_copied;
|
|
};
|
|
|
|
struct vec_var_usage {
|
|
/* Convenience set of all components this variable has */
|
|
nir_component_mask_t all_comps;
|
|
|
|
nir_component_mask_t comps_read;
|
|
nir_component_mask_t comps_written;
|
|
|
|
nir_component_mask_t comps_kept;
|
|
|
|
/* True if there is a copy that isn't to/from a shrinkable vector */
|
|
bool has_external_copy;
|
|
struct set *vars_copied;
|
|
|
|
unsigned num_levels;
|
|
struct array_level_usage levels[0];
|
|
};
|
|
|
|
static struct vec_var_usage *
|
|
get_vec_var_usage(nir_variable *var,
|
|
struct hash_table *var_usage_map,
|
|
bool add_usage_entry, void *mem_ctx)
|
|
{
|
|
struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
|
|
if (entry)
|
|
return entry->data;
|
|
|
|
if (!add_usage_entry)
|
|
return NULL;
|
|
|
|
/* Check to make sure that we are working with an array of vectors. We
|
|
* don't bother to shrink single vectors because we figure that we can
|
|
* clean it up better with SSA than by inserting piles of vecN instructions
|
|
* to compact results.
|
|
*/
|
|
int num_levels = num_array_levels_in_array_of_vector_type(var->type);
|
|
if (num_levels < 1)
|
|
return NULL; /* Not an array of vectors */
|
|
|
|
struct vec_var_usage *usage =
|
|
rzalloc_size(mem_ctx, sizeof(*usage) +
|
|
num_levels * sizeof(usage->levels[0]));
|
|
|
|
usage->num_levels = num_levels;
|
|
const struct glsl_type *type = var->type;
|
|
for (unsigned i = 0; i < num_levels; i++) {
|
|
usage->levels[i].array_len = glsl_get_length(type);
|
|
assert(glsl_get_explicit_stride(type) == 0);
|
|
type = glsl_get_array_element(type);
|
|
}
|
|
assert(glsl_type_is_vector_or_scalar(type));
|
|
|
|
usage->all_comps = (1 << glsl_get_components(type)) - 1;
|
|
|
|
_mesa_hash_table_insert(var_usage_map, var, usage);
|
|
|
|
return usage;
|
|
}
|
|
|
|
static struct vec_var_usage *
|
|
get_vec_deref_usage(nir_deref_instr *deref,
|
|
struct hash_table *var_usage_map,
|
|
nir_variable_mode modes,
|
|
bool add_usage_entry, void *mem_ctx)
|
|
{
|
|
if (!(deref->mode & modes))
|
|
return NULL;
|
|
|
|
return get_vec_var_usage(nir_deref_instr_get_variable(deref),
|
|
var_usage_map, add_usage_entry, mem_ctx);
|
|
}
|
|
|
|
static void
|
|
mark_deref_used(nir_deref_instr *deref,
|
|
nir_component_mask_t comps_read,
|
|
nir_component_mask_t comps_written,
|
|
nir_deref_instr *copy_deref,
|
|
struct hash_table *var_usage_map,
|
|
nir_variable_mode modes,
|
|
void *mem_ctx)
|
|
{
|
|
if (!(deref->mode & modes))
|
|
return;
|
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
struct vec_var_usage *usage =
|
|
get_vec_var_usage(var, var_usage_map, true, mem_ctx);
|
|
if (!usage)
|
|
return;
|
|
|
|
usage->comps_read |= comps_read & usage->all_comps;
|
|
usage->comps_written |= comps_written & usage->all_comps;
|
|
|
|
struct vec_var_usage *copy_usage = NULL;
|
|
if (copy_deref) {
|
|
copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
|
|
true, mem_ctx);
|
|
if (copy_usage) {
|
|
if (usage->vars_copied == NULL) {
|
|
usage->vars_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
|
|
_mesa_key_pointer_equal);
|
|
}
|
|
_mesa_set_add(usage->vars_copied, copy_usage);
|
|
} else {
|
|
usage->has_external_copy = true;
|
|
}
|
|
}
|
|
|
|
nir_deref_path path;
|
|
nir_deref_path_init(&path, deref, mem_ctx);
|
|
|
|
nir_deref_path copy_path;
|
|
if (copy_usage)
|
|
nir_deref_path_init(©_path, copy_deref, mem_ctx);
|
|
|
|
unsigned copy_i = 0;
|
|
for (unsigned i = 0; i < usage->num_levels; i++) {
|
|
struct array_level_usage *level = &usage->levels[i];
|
|
nir_deref_instr *deref = path.path[i + 1];
|
|
assert(deref->deref_type == nir_deref_type_array ||
|
|
deref->deref_type == nir_deref_type_array_wildcard);
|
|
|
|
unsigned max_used;
|
|
if (deref->deref_type == nir_deref_type_array) {
|
|
max_used = nir_src_is_const(deref->arr.index) ?
|
|
nir_src_as_uint(deref->arr.index) : UINT_MAX;
|
|
} else {
|
|
/* For wildcards, we read or wrote the whole thing. */
|
|
assert(deref->deref_type == nir_deref_type_array_wildcard);
|
|
max_used = level->array_len - 1;
|
|
|
|
if (copy_usage) {
|
|
/* Match each wildcard level with the level on copy_usage */
|
|
for (; copy_path.path[copy_i + 1]; copy_i++) {
|
|
if (copy_path.path[copy_i + 1]->deref_type ==
|
|
nir_deref_type_array_wildcard)
|
|
break;
|
|
}
|
|
struct array_level_usage *copy_level =
|
|
©_usage->levels[copy_i++];
|
|
|
|
if (level->levels_copied == NULL) {
|
|
level->levels_copied =
|
|
_mesa_set_create(mem_ctx, _mesa_hash_pointer,
|
|
_mesa_key_pointer_equal);
|
|
}
|
|
_mesa_set_add(level->levels_copied, copy_level);
|
|
} else {
|
|
/* We have a wildcard and it comes from a variable we aren't
|
|
* tracking; flag it and we'll know to not shorten this array.
|
|
*/
|
|
level->has_external_copy = true;
|
|
}
|
|
}
|
|
|
|
if (comps_written)
|
|
level->max_written = MAX2(level->max_written, max_used);
|
|
if (comps_read)
|
|
level->max_read = MAX2(level->max_read, max_used);
|
|
}
|
|
}
|
|
|
|
static bool
|
|
src_is_load_deref(nir_src src, nir_src deref_src)
|
|
{
|
|
assert(src.is_ssa);
|
|
assert(deref_src.is_ssa);
|
|
|
|
if (src.ssa->parent_instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
|
|
nir_intrinsic_instr *load = nir_instr_as_intrinsic(src.ssa->parent_instr);
|
|
if (load->intrinsic != nir_intrinsic_load_deref)
|
|
return false;
|
|
|
|
assert(load->src[0].is_ssa);
|
|
|
|
return load->src[0].ssa == deref_src.ssa;
|
|
}
|
|
|
|
/* Returns all non-self-referential components of a store instruction. A
|
|
* component is self-referential if it comes from the same component of a load
|
|
* instruction on the same deref. If the only data in a particular component
|
|
* of a variable came directly from that component then it's undefined. The
|
|
* only way to get defined data into a component of a variable is for it to
|
|
* get written there by something outside or from a different component.
|
|
*
|
|
* This is a fairly common pattern in shaders that come from either GLSL IR or
|
|
* GLSLang because both glsl_to_nir and GLSLang implement write-masking with
|
|
* load-vec-store.
|
|
*/
|
|
static nir_component_mask_t
|
|
get_non_self_referential_store_comps(nir_intrinsic_instr *store)
|
|
{
|
|
nir_component_mask_t comps = nir_intrinsic_write_mask(store);
|
|
|
|
assert(store->src[1].is_ssa);
|
|
nir_instr *src_instr = store->src[1].ssa->parent_instr;
|
|
if (src_instr->type != nir_instr_type_alu)
|
|
return comps;
|
|
|
|
nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
|
|
|
|
if (src_alu->op == nir_op_imov ||
|
|
src_alu->op == nir_op_fmov) {
|
|
/* If it's just a swizzle of a load from the same deref, discount any
|
|
* channels that don't move in the swizzle.
|
|
*/
|
|
if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
|
|
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
|
|
if (src_alu->src[0].swizzle[i] == i)
|
|
comps &= ~(1u << i);
|
|
}
|
|
}
|
|
} else if (src_alu->op == nir_op_vec2 ||
|
|
src_alu->op == nir_op_vec3 ||
|
|
src_alu->op == nir_op_vec4) {
|
|
/* If it's a vec, discount any channels that are just loads from the
|
|
* same deref put in the same spot.
|
|
*/
|
|
for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
|
|
if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
|
|
src_alu->src[i].swizzle[0] == i)
|
|
comps &= ~(1u << i);
|
|
}
|
|
}
|
|
|
|
return comps;
|
|
}
|
|
|
|
static void
|
|
find_used_components_impl(nir_function_impl *impl,
|
|
struct hash_table *var_usage_map,
|
|
nir_variable_mode modes,
|
|
void *mem_ctx)
|
|
{
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
switch (intrin->intrinsic) {
|
|
case nir_intrinsic_load_deref:
|
|
mark_deref_used(nir_src_as_deref(intrin->src[0]),
|
|
nir_ssa_def_components_read(&intrin->dest.ssa), 0,
|
|
NULL, var_usage_map, modes, mem_ctx);
|
|
break;
|
|
|
|
case nir_intrinsic_store_deref:
|
|
mark_deref_used(nir_src_as_deref(intrin->src[0]),
|
|
0, get_non_self_referential_store_comps(intrin),
|
|
NULL, var_usage_map, modes, mem_ctx);
|
|
break;
|
|
|
|
case nir_intrinsic_copy_deref: {
|
|
/* Just mark everything used for copies. */
|
|
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
|
|
nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
|
|
mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
|
|
mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool
|
|
shrink_vec_var_list(struct exec_list *vars,
|
|
struct hash_table *var_usage_map)
|
|
{
|
|
/* Initialize the components kept field of each variable. This is the
|
|
* AND of the components written and components read. If a component is
|
|
* written but never read, it's dead. If it is read but never written,
|
|
* then all values read are undefined garbage and we may as well not read
|
|
* them.
|
|
*
|
|
* The same logic applies to the array length. We make the array length
|
|
* the minimum needed required length between read and write and plan to
|
|
* discard any OOB access. The one exception here is indirect writes
|
|
* because we don't know where they will land and we can't shrink an array
|
|
* with indirect writes because previously in-bounds writes may become
|
|
* out-of-bounds and have undefined behavior.
|
|
*
|
|
* Also, if we have a copy that to/from something we can't shrink, we need
|
|
* to leave components and array_len of any wildcards alone.
|
|
*/
|
|
nir_foreach_variable(var, vars) {
|
|
struct vec_var_usage *usage =
|
|
get_vec_var_usage(var, var_usage_map, false, NULL);
|
|
if (!usage)
|
|
continue;
|
|
|
|
assert(usage->comps_kept == 0);
|
|
if (usage->has_external_copy)
|
|
usage->comps_kept = usage->all_comps;
|
|
else
|
|
usage->comps_kept = usage->comps_read & usage->comps_written;
|
|
|
|
for (unsigned i = 0; i < usage->num_levels; i++) {
|
|
struct array_level_usage *level = &usage->levels[i];
|
|
assert(level->array_len > 0);
|
|
|
|
if (level->max_written == UINT_MAX || level->has_external_copy)
|
|
continue; /* Can't shrink */
|
|
|
|
unsigned max_used = MIN2(level->max_read, level->max_written);
|
|
level->array_len = MIN2(max_used, level->array_len - 1) + 1;
|
|
}
|
|
}
|
|
|
|
/* In order for variable copies to work, we have to have the same data type
|
|
* on the source and the destination. In order to satisfy this, we run a
|
|
* little fixed-point algorithm to transitively ensure that we get enough
|
|
* components and array elements for this to hold for all copies.
|
|
*/
|
|
bool fp_progress;
|
|
do {
|
|
fp_progress = false;
|
|
nir_foreach_variable(var, vars) {
|
|
struct vec_var_usage *var_usage =
|
|
get_vec_var_usage(var, var_usage_map, false, NULL);
|
|
if (!var_usage || !var_usage->vars_copied)
|
|
continue;
|
|
|
|
set_foreach(var_usage->vars_copied, copy_entry) {
|
|
struct vec_var_usage *copy_usage = (void *)copy_entry->key;
|
|
if (copy_usage->comps_kept != var_usage->comps_kept) {
|
|
nir_component_mask_t comps_kept =
|
|
(var_usage->comps_kept | copy_usage->comps_kept);
|
|
var_usage->comps_kept = comps_kept;
|
|
copy_usage->comps_kept = comps_kept;
|
|
fp_progress = true;
|
|
}
|
|
}
|
|
|
|
for (unsigned i = 0; i < var_usage->num_levels; i++) {
|
|
struct array_level_usage *var_level = &var_usage->levels[i];
|
|
if (!var_level->levels_copied)
|
|
continue;
|
|
|
|
set_foreach(var_level->levels_copied, copy_entry) {
|
|
struct array_level_usage *copy_level = (void *)copy_entry->key;
|
|
if (var_level->array_len != copy_level->array_len) {
|
|
unsigned array_len =
|
|
MAX2(var_level->array_len, copy_level->array_len);
|
|
var_level->array_len = array_len;
|
|
copy_level->array_len = array_len;
|
|
fp_progress = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} while (fp_progress);
|
|
|
|
bool vars_shrunk = false;
|
|
nir_foreach_variable_safe(var, vars) {
|
|
struct vec_var_usage *usage =
|
|
get_vec_var_usage(var, var_usage_map, false, NULL);
|
|
if (!usage)
|
|
continue;
|
|
|
|
bool shrunk = false;
|
|
const struct glsl_type *vec_type = var->type;
|
|
for (unsigned i = 0; i < usage->num_levels; i++) {
|
|
/* If we've reduced the array to zero elements at some level, just
|
|
* set comps_kept to 0 and delete the variable.
|
|
*/
|
|
if (usage->levels[i].array_len == 0) {
|
|
usage->comps_kept = 0;
|
|
break;
|
|
}
|
|
|
|
assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
|
|
if (usage->levels[i].array_len < glsl_get_length(vec_type))
|
|
shrunk = true;
|
|
vec_type = glsl_get_array_element(vec_type);
|
|
}
|
|
assert(glsl_type_is_vector_or_scalar(vec_type));
|
|
|
|
assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
|
|
if (usage->comps_kept != usage->all_comps)
|
|
shrunk = true;
|
|
|
|
if (usage->comps_kept == 0) {
|
|
/* This variable is dead, remove it */
|
|
vars_shrunk = true;
|
|
exec_node_remove(&var->node);
|
|
continue;
|
|
}
|
|
|
|
if (!shrunk) {
|
|
/* This variable doesn't need to be shrunk. Remove it from the
|
|
* hash table so later steps will ignore it.
|
|
*/
|
|
_mesa_hash_table_remove_key(var_usage_map, var);
|
|
continue;
|
|
}
|
|
|
|
/* Build the new var type */
|
|
unsigned new_num_comps = util_bitcount(usage->comps_kept);
|
|
const struct glsl_type *new_type =
|
|
glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
|
|
for (int i = usage->num_levels - 1; i >= 0; i--) {
|
|
assert(usage->levels[i].array_len > 0);
|
|
/* If the original type was a matrix type, we'd like to keep that so
|
|
* we don't convert matrices into arrays.
|
|
*/
|
|
if (i == usage->num_levels - 1 &&
|
|
glsl_type_is_matrix(glsl_without_array(var->type)) &&
|
|
new_num_comps > 1 && usage->levels[i].array_len > 1) {
|
|
new_type = glsl_matrix_type(glsl_get_base_type(new_type),
|
|
new_num_comps,
|
|
usage->levels[i].array_len);
|
|
} else {
|
|
new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
|
|
}
|
|
}
|
|
var->type = new_type;
|
|
|
|
vars_shrunk = true;
|
|
}
|
|
|
|
return vars_shrunk;
|
|
}
|
|
|
|
static bool
|
|
vec_deref_is_oob(nir_deref_instr *deref,
|
|
struct vec_var_usage *usage)
|
|
{
|
|
nir_deref_path path;
|
|
nir_deref_path_init(&path, deref, NULL);
|
|
|
|
bool oob = false;
|
|
for (unsigned i = 0; i < usage->num_levels; i++) {
|
|
nir_deref_instr *p = path.path[i + 1];
|
|
if (p->deref_type == nir_deref_type_array_wildcard)
|
|
continue;
|
|
|
|
if (nir_src_is_const(p->arr.index) &&
|
|
nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
|
|
oob = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
nir_deref_path_finish(&path);
|
|
|
|
return oob;
|
|
}
|
|
|
|
static bool
|
|
vec_deref_is_dead_or_oob(nir_deref_instr *deref,
|
|
struct hash_table *var_usage_map,
|
|
nir_variable_mode modes)
|
|
{
|
|
struct vec_var_usage *usage =
|
|
get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
|
|
if (!usage)
|
|
return false;
|
|
|
|
return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
|
|
}
|
|
|
|
static void
|
|
shrink_vec_var_access_impl(nir_function_impl *impl,
|
|
struct hash_table *var_usage_map,
|
|
nir_variable_mode modes)
|
|
{
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr_safe(instr, block) {
|
|
switch (instr->type) {
|
|
case nir_instr_type_deref: {
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
|
if (!(deref->mode & modes))
|
|
break;
|
|
|
|
/* Clean up any dead derefs we find lying around. They may refer
|
|
* to variables we've deleted.
|
|
*/
|
|
if (nir_deref_instr_remove_if_unused(deref))
|
|
break;
|
|
|
|
/* Update the type in the deref to keep the types consistent as
|
|
* you walk down the chain. We don't need to check if this is one
|
|
* of the derefs we're shrinking because this is a no-op if it
|
|
* isn't. The worst that could happen is that we accidentally fix
|
|
* an invalid deref.
|
|
*/
|
|
if (deref->deref_type == nir_deref_type_var) {
|
|
deref->type = deref->var->type;
|
|
} else if (deref->deref_type == nir_deref_type_array ||
|
|
deref->deref_type == nir_deref_type_array_wildcard) {
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
assert(glsl_type_is_array(parent->type) ||
|
|
glsl_type_is_matrix(parent->type));
|
|
deref->type = glsl_get_array_element(parent->type);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case nir_instr_type_intrinsic: {
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
/* If we have a copy whose source or destination has been deleted
|
|
* because we determined the variable was dead, then we just
|
|
* delete the copy instruction. If the source variable was dead
|
|
* then it was writing undefined garbage anyway and if it's the
|
|
* destination variable that's dead then the write isn't needed.
|
|
*/
|
|
if (intrin->intrinsic == nir_intrinsic_copy_deref) {
|
|
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
|
|
nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
|
|
if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
|
|
vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
|
|
nir_instr_remove(&intrin->instr);
|
|
nir_deref_instr_remove_if_unused(dst);
|
|
nir_deref_instr_remove_if_unused(src);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (intrin->intrinsic != nir_intrinsic_load_deref &&
|
|
intrin->intrinsic != nir_intrinsic_store_deref)
|
|
continue;
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
|
if (!(deref->mode & modes))
|
|
continue;
|
|
|
|
struct vec_var_usage *usage =
|
|
get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
|
|
if (!usage)
|
|
continue;
|
|
|
|
if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
|
|
if (intrin->intrinsic == nir_intrinsic_load_deref) {
|
|
nir_ssa_def *u =
|
|
nir_ssa_undef(&b, intrin->dest.ssa.num_components,
|
|
intrin->dest.ssa.bit_size);
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
|
nir_src_for_ssa(u));
|
|
}
|
|
nir_instr_remove(&intrin->instr);
|
|
nir_deref_instr_remove_if_unused(deref);
|
|
continue;
|
|
}
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_load_deref) {
|
|
b.cursor = nir_after_instr(&intrin->instr);
|
|
|
|
nir_ssa_def *undef =
|
|
nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
|
|
nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
|
|
unsigned c = 0;
|
|
for (unsigned i = 0; i < intrin->num_components; i++) {
|
|
if (usage->comps_kept & (1u << i))
|
|
vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
|
|
else
|
|
vec_srcs[i] = undef;
|
|
}
|
|
nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
|
|
|
|
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
|
|
nir_src_for_ssa(vec),
|
|
vec->parent_instr);
|
|
|
|
/* The SSA def is now only used by the swizzle. It's safe to
|
|
* shrink the number of components.
|
|
*/
|
|
assert(list_length(&intrin->dest.ssa.uses) == c);
|
|
intrin->num_components = c;
|
|
intrin->dest.ssa.num_components = c;
|
|
} else {
|
|
nir_component_mask_t write_mask =
|
|
nir_intrinsic_write_mask(intrin);
|
|
|
|
unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
|
|
nir_component_mask_t new_write_mask = 0;
|
|
unsigned c = 0;
|
|
for (unsigned i = 0; i < intrin->num_components; i++) {
|
|
if (usage->comps_kept & (1u << i)) {
|
|
swizzle[c] = i;
|
|
if (write_mask & (1u << i))
|
|
new_write_mask |= 1u << c;
|
|
c++;
|
|
}
|
|
}
|
|
|
|
b.cursor = nir_before_instr(&intrin->instr);
|
|
|
|
nir_ssa_def *swizzled =
|
|
nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false);
|
|
|
|
/* Rewrite to use the compacted source */
|
|
nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
|
|
nir_src_for_ssa(swizzled));
|
|
nir_intrinsic_set_write_mask(intrin, new_write_mask);
|
|
intrin->num_components = c;
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool
|
|
function_impl_has_vars_with_modes(nir_function_impl *impl,
|
|
nir_variable_mode modes)
|
|
{
|
|
nir_shader *shader = impl->function->shader;
|
|
|
|
if ((modes & nir_var_private) && !exec_list_is_empty(&shader->globals))
|
|
return true;
|
|
|
|
if ((modes & nir_var_function) && !exec_list_is_empty(&impl->locals))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/** Attempt to shrink arrays of vectors
|
|
*
|
|
* This pass looks at variables which contain a vector or an array (possibly
|
|
* multiple dimensions) of vectors and attempts to lower to a smaller vector
|
|
* or array. If the pass can prove that a component of a vector (or array of
|
|
* vectors) is never really used, then that component will be removed.
|
|
* Similarly, the pass attempts to shorten arrays based on what elements it
|
|
* can prove are never read or never contain valid data.
|
|
*/
|
|
bool
|
|
nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
|
|
{
|
|
assert((modes & (nir_var_private | nir_var_function)) == modes);
|
|
|
|
void *mem_ctx = ralloc_context(NULL);
|
|
|
|
struct hash_table *var_usage_map =
|
|
_mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
|
|
_mesa_key_pointer_equal);
|
|
|
|
bool has_vars_to_shrink = false;
|
|
nir_foreach_function(function, shader) {
|
|
if (!function->impl)
|
|
continue;
|
|
|
|
/* Don't even bother crawling the IR if we don't have any variables.
|
|
* Given that this pass deletes any unused variables, it's likely that
|
|
* we will be in this scenario eventually.
|
|
*/
|
|
if (function_impl_has_vars_with_modes(function->impl, modes)) {
|
|
has_vars_to_shrink = true;
|
|
find_used_components_impl(function->impl, var_usage_map,
|
|
modes, mem_ctx);
|
|
}
|
|
}
|
|
if (!has_vars_to_shrink) {
|
|
ralloc_free(mem_ctx);
|
|
return false;
|
|
}
|
|
|
|
bool globals_shrunk = false;
|
|
if (modes & nir_var_private)
|
|
globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
|
|
|
|
bool progress = false;
|
|
nir_foreach_function(function, shader) {
|
|
if (!function->impl)
|
|
continue;
|
|
|
|
bool locals_shrunk = false;
|
|
if (modes & nir_var_function) {
|
|
locals_shrunk = shrink_vec_var_list(&function->impl->locals,
|
|
var_usage_map);
|
|
}
|
|
|
|
if (globals_shrunk || locals_shrunk) {
|
|
shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
|
|
|
|
nir_metadata_preserve(function->impl, nir_metadata_block_index |
|
|
nir_metadata_dominance);
|
|
progress = true;
|
|
}
|
|
}
|
|
|
|
ralloc_free(mem_ctx);
|
|
|
|
return progress;
|
|
}
|