mesa/src/compiler/nir/nir_opt_find_array_copies.c

/*
 * Copyright © 2018 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "nir.h"
#include "nir_builder.h"
#include "nir_deref.h"

struct match_node {
   /* Note: these fields are only valid for leaf nodes */

   unsigned next_array_idx;
   int src_wildcard_idx;
   nir_deref_path first_src_path;

   /* The index of the first read of the source path that's part of the copy
    * we're matching. If the last write to the source path is after this, we
    * would get a different result from reading it at the end and we can't
    * emit the copy.
    */
   unsigned first_src_read;

   /* The last time there was a write to this node. */
   unsigned last_overwritten;

   /* The last time there was a write to this node which successfully advanced
    * next_array_idx. This helps us catch any intervening aliased writes.
    */
   unsigned last_successful_write;

   unsigned num_children;
   struct match_node *children[];
};

struct match_state {
   /* Map from nir_variable * -> match_node */
   struct hash_table *table;

   unsigned cur_instr;

   nir_builder builder;

   void *dead_ctx;
};

static struct match_node *
create_match_node(const struct glsl_type *type, struct match_state *state)
{
   unsigned num_children = 0;
   if (glsl_type_is_array_or_matrix(type)) {
      /* One for wildcards */
      num_children = glsl_get_length(type) + 1;
   } else if (glsl_type_is_struct_or_ifc(type)) {
      num_children = glsl_get_length(type);
   }

   struct match_node *node = rzalloc_size(state->dead_ctx,
                                          sizeof(struct match_node) +
                                          num_children * sizeof(struct match_node *));
   node->num_children = num_children;
   node->src_wildcard_idx = -1;
   node->first_src_read = UINT32_MAX;
   return node;
}

static struct match_node *
node_for_deref(nir_deref_instr *instr, struct match_node *parent,
               struct match_state *state)
{
   unsigned idx;
   switch (instr->deref_type) {
   case nir_deref_type_var: {
      struct hash_entry *entry = _mesa_hash_table_search(state->table, instr->var);
      if (entry) {
         return entry->data;
      } else {
         struct match_node *node = create_match_node(instr->type, state);
         _mesa_hash_table_insert(state->table, instr->var, node);
         return node;
      }
   }

   case nir_deref_type_array_wildcard:
      idx = parent->num_children - 1;
      break;

   case nir_deref_type_array:
      if (nir_src_is_const(instr->arr.index)) {
         idx = nir_src_as_uint(instr->arr.index);
         assert(idx < parent->num_children - 1);
      } else {
         idx = parent->num_children - 1;
      }
      break;

   case nir_deref_type_struct:
      idx = instr->strct.index;
      break;

   default:
      unreachable("bad deref type");
   }

   assert(idx < parent->num_children);
   if (parent->children[idx]) {
      return parent->children[idx];
   } else {
      struct match_node *node = create_match_node(instr->type, state);
      parent->children[idx] = node;
      return node;
   }
}

static struct match_node *
node_for_wildcard(const struct glsl_type *type, struct match_node *parent,
                  struct match_state *state)
{
   assert(glsl_type_is_array_or_matrix(type));
   unsigned idx = glsl_get_length(type);

   if (parent->children[idx]) {
      return parent->children[idx];
   } else {
      struct match_node *node =
         create_match_node(glsl_get_array_element(type), state);
      parent->children[idx] = node;
      return node;
   }
}

static struct match_node *
node_for_path(nir_deref_path *path, struct match_state *state)
{
   struct match_node *node = NULL;
   for (nir_deref_instr **instr = path->path; *instr; instr++)
      node = node_for_deref(*instr, node, state);

   return node;
}

static struct match_node *
node_for_path_with_wildcard(nir_deref_path *path, unsigned wildcard_idx,
                            struct match_state *state)
{
   struct match_node *node = NULL;
   unsigned idx = 0;
   for (nir_deref_instr **instr = path->path; *instr; instr++, idx++) {
      if (idx == wildcard_idx)
         node = node_for_wildcard((*(instr - 1))->type, node, state);
      else
         node = node_for_deref(*instr, node, state);
   }

   return node;
}

typedef void (*match_cb)(struct match_node *, struct match_state *);

static void
_foreach_aliasing(nir_deref_instr **deref, match_cb cb,
                  struct match_node *node, struct match_state *state)
{
   if (*deref == NULL) {
      cb(node, state);
      return;
   }

   switch ((*deref)->deref_type) {
   case nir_deref_type_struct: {
      struct match_node *child = node->children[(*deref)->strct.index];
      if (child)
         _foreach_aliasing(deref + 1, cb, child, state);
      return;
   }

   case nir_deref_type_array:
   case nir_deref_type_array_wildcard: {
      if ((*deref)->deref_type == nir_deref_type_array_wildcard ||
          !nir_src_is_const((*deref)->arr.index)) {
         /* This access may touch any index, so we have to visit all of
          * them.
          */
         for (unsigned i = 0; i < node->num_children; i++) {
            if (node->children[i])
               _foreach_aliasing(deref + 1, cb, node->children[i], state);
         }
      } else {
         /* Visit the wildcard entry if any */
         if (node->children[node->num_children - 1]) {
            _foreach_aliasing(deref + 1, cb,
                              node->children[node->num_children - 1], state);
         }

         unsigned index = nir_src_as_uint((*deref)->arr.index);
         /* Check that the index is in-bounds */
         if (index < node->num_children - 1 && node->children[index])
            _foreach_aliasing(deref + 1, cb, node->children[index], state);
      }
      return;
   }

   default:
      unreachable("bad deref type");
   }
}

/* Given a deref path, find all the leaf deref nodes that alias it. */

static void
foreach_aliasing_node(nir_deref_path *path,
                      match_cb cb,
                      struct match_state *state)
{
   assert(path->path[0]->deref_type == nir_deref_type_var);
   struct hash_entry *entry = _mesa_hash_table_search(state->table,
                                                      path->path[0]->var);
   if (entry)
      _foreach_aliasing(&path->path[1], cb, entry->data, state);
}

static nir_deref_instr *
build_wildcard_deref(nir_builder *b, nir_deref_path *path,
                     unsigned wildcard_idx)
{
   assert(path->path[wildcard_idx]->deref_type == nir_deref_type_array);

   nir_deref_instr *tail =
      nir_build_deref_array_wildcard(b, path->path[wildcard_idx - 1]);

   for (unsigned i = wildcard_idx + 1; path->path[i]; i++)
      tail = nir_build_deref_follower(b, tail, path->path[i]);

   return tail;
}

static void
clobber(struct match_node *node, struct match_state *state)
{
   node->last_overwritten = state->cur_instr;
}

static bool
try_match_deref(nir_deref_path *base_path, int *path_array_idx,
                nir_deref_path *deref_path, int arr_idx)
{
   for (int i = 0; ; i++) {
      nir_deref_instr *b = base_path->path[i];
      nir_deref_instr *d = deref_path->path[i];
      /* They have to be the same length */
      if ((b == NULL) != (d == NULL))
         return false;

      if (b == NULL)
         break;

      /* This can happen if one is a deref_array and the other a wildcard */
      if (b->deref_type != d->deref_type)
         return false;;

      switch (b->deref_type) {
      case nir_deref_type_var:
         if (b->var != d->var)
            return false;
         continue;

      case nir_deref_type_array:
         assert(b->arr.index.is_ssa && d->arr.index.is_ssa);
         const bool const_b_idx = nir_src_is_const(b->arr.index);
         const bool const_d_idx = nir_src_is_const(d->arr.index);
         const unsigned b_idx = const_b_idx ? nir_src_as_uint(b->arr.index) : 0;
         const unsigned d_idx = const_d_idx ? nir_src_as_uint(d->arr.index) : 0;

         /* If we don't have an index into the path yet or if this entry in
          * the path is at the array index, see if this is a candidate.  We're
          * looking for an index which is zero in the base deref and arr_idx
          * in the search deref.
          */
         if ((*path_array_idx < 0 || *path_array_idx == i) &&
             const_b_idx && b_idx == 0 &&
             const_d_idx && d_idx == arr_idx) {
            *path_array_idx = i;
            continue;
         }

         /* We're at the array index but not a candidate */
         if (*path_array_idx == i)
            return false;

         /* If we're not the path array index, we must match exactly.  We
          * could probably just compare SSA values and trust in copy
          * propagation but doing it ourselves means this pass can run a bit
          * earlier.
          */
         if (b->arr.index.ssa == d->arr.index.ssa ||
             (const_b_idx && const_d_idx && b_idx == d_idx))
            continue;

         return false;

      case nir_deref_type_array_wildcard:
         continue;

      case nir_deref_type_struct:
         if (b->strct.index != d->strct.index)
            return false;
         continue;

      default:
         unreachable("Invalid deref type in a path");
      }
   }

   /* If we got here without failing, we've matched.  However, it isn't an
    * array match unless we found an altered array index.
    */
   return *path_array_idx > 0;
}

static void
handle_read(nir_deref_instr *src, struct match_state *state)
{
   /* We only need to create an entry for sources that might be used to form
    * an array copy. Hence no indirects or indexing into a vector.
    */
   if (nir_deref_instr_has_indirect(src) ||
       nir_deref_instr_is_known_out_of_bounds(src) ||
       (src->deref_type == nir_deref_type_array &&
        glsl_type_is_vector(nir_src_as_deref(src->parent)->type)))
      return;

   nir_deref_path src_path;
   nir_deref_path_init(&src_path, src, state->dead_ctx);

   /* Create a node for this source if it doesn't exist. The point of this is
    * to know which nodes aliasing a given store we actually need to care
    * about, to avoid creating an excessive amount of nodes.
    */
   node_for_path(&src_path, state);
}

/* The core implementation, which is used for both copies and writes. Return
 * true if a copy is created.
 */
static bool
handle_write(nir_deref_instr *dst, nir_deref_instr *src,
             unsigned write_index, unsigned read_index,
             struct match_state *state)
{
   nir_builder *b = &state->builder;

   nir_deref_path dst_path;
   nir_deref_path_init(&dst_path, dst, state->dead_ctx);

   unsigned idx = 0;
   for (nir_deref_instr **instr = dst_path.path; *instr; instr++, idx++) {
      if ((*instr)->deref_type != nir_deref_type_array)
         continue;

      /* Get the entry where the index is replaced by a wildcard, so that we
       * hopefully can keep matching an array copy.
       */
      struct match_node *dst_node =
         node_for_path_with_wildcard(&dst_path, idx, state);

      if (!src)
         goto reset;

      if (nir_src_as_uint((*instr)->arr.index) != dst_node->next_array_idx)
         goto reset;

      if (dst_node->next_array_idx == 0) {
         /* At this point there may be multiple source indices which are zero,
          * so we can't pin down the actual source index. Just store it and
          * move on.
          */
         nir_deref_path_init(&dst_node->first_src_path, src, state->dead_ctx);
      } else {
         nir_deref_path src_path;
         nir_deref_path_init(&src_path, src, state->dead_ctx);
         bool result = try_match_deref(&dst_node->first_src_path,
                                       &dst_node->src_wildcard_idx,
                                       &src_path, dst_node->next_array_idx);
         nir_deref_path_finish(&src_path);
         if (!result)
            goto reset;
      }

      /* Check if an aliasing write clobbered the array after the last normal
       * write. For example, with a sequence like this:
       *
       * dst[0][*] = src[0][*];
       * dst[0][0] = 0; // invalidates the array copy dst[*][*] = src[*][*]
       * dst[1][*] = src[1][*];
       *
       * Note that the second write wouldn't reset the entry for dst[*][*]
       * by itself, but it'll be caught by this check when processing the
       * third copy.
       */
      if (dst_node->last_successful_write < dst_node->last_overwritten)
         goto reset;

      dst_node->last_successful_write = write_index;

      /* In this case we've successfully processed an array element. Check if
       * this is the last, so that we can emit an array copy.
       */
      dst_node->next_array_idx++;
      dst_node->first_src_read = MIN2(dst_node->first_src_read, read_index);
      if (dst_node->next_array_idx > 1 &&
          dst_node->next_array_idx == glsl_get_length((*(instr - 1))->type)) {
         /* Make sure that nothing was overwritten. */
         struct match_node *src_node =
            node_for_path_with_wildcard(&dst_node->first_src_path,
                                        dst_node->src_wildcard_idx,
                                        state);

         if (src_node->last_overwritten <= dst_node->first_src_read) {
            nir_copy_deref(b, build_wildcard_deref(b, &dst_path, idx),
                              build_wildcard_deref(b, &dst_node->first_src_path,
                                                   dst_node->src_wildcard_idx));
            foreach_aliasing_node(&dst_path, clobber, state);
            return true;
         }
      } else {
         continue;
      }

reset:
      dst_node->next_array_idx = 0;
      dst_node->src_wildcard_idx = -1;
      dst_node->last_successful_write = 0;
      dst_node->first_src_read = UINT32_MAX;
   }

   /* Mark everything aliasing dst_path as clobbered. This needs to happen
    * last since in the loop above we need to know what last clobbered
    * dst_node and this overwrites that.
    */
   foreach_aliasing_node(&dst_path, clobber, state);

   return false;
}

static bool
opt_find_array_copies_block(nir_builder *b, nir_block *block,
                            struct match_state *state)
{
   bool progress = false;

   unsigned next_index = 0;

   _mesa_hash_table_clear(state->table, NULL);

   nir_foreach_instr(instr, block) {
      if (instr->type != nir_instr_type_intrinsic)
         continue;

      /* Index the instructions before we do anything else. */
      instr->index = next_index++;

      /* Save the index of this instruction */
      state->cur_instr = instr->index;

      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

      if (intrin->intrinsic == nir_intrinsic_load_deref) {
         handle_read(nir_src_as_deref(intrin->src[0]), state);
         continue;
      }

      if (intrin->intrinsic != nir_intrinsic_copy_deref &&
          intrin->intrinsic != nir_intrinsic_store_deref)
         continue;

      nir_deref_instr *dst_deref = nir_src_as_deref(intrin->src[0]);

      /* The destination must be local.  If we see a non-local store, we
       * continue on because it won't affect local stores or read-only
       * variables.
       */
      if (dst_deref->mode != nir_var_function_temp)
         continue;

      /* If there are any known out-of-bounds writes, then we can just skip
       * this write as it's undefined and won't contribute to building up an
       * array copy anyways.
       */
      if (nir_deref_instr_is_known_out_of_bounds(dst_deref))
         continue;

      nir_deref_instr *src_deref;
      unsigned load_index = 0;
      if (intrin->intrinsic == nir_intrinsic_copy_deref) {
         src_deref = nir_src_as_deref(intrin->src[1]);
         load_index = intrin->instr.index;
      } else {
         assert(intrin->intrinsic == nir_intrinsic_store_deref);
         nir_intrinsic_instr *load = nir_src_as_intrinsic(intrin->src[1]);
         if (load == NULL || load->intrinsic != nir_intrinsic_load_deref) {
            src_deref = NULL;
         } else {
            src_deref = nir_src_as_deref(load->src[0]);
            load_index = load->instr.index;
         }

         if (nir_intrinsic_write_mask(intrin) !=
             (1 << glsl_get_components(dst_deref->type)) - 1) {
            src_deref = NULL;
         }
      }

      /* The source must be either local or something that's guaranteed to be
       * read-only.
       */
      const nir_variable_mode read_only_modes =
         nir_var_shader_in | nir_var_uniform | nir_var_system_value;
      if (src_deref &&
          !(src_deref->mode & (nir_var_function_temp | read_only_modes))) {
         src_deref = NULL;
      }

      /* There must be no indirects in the source or destination and no known
       * out-of-bounds accesses in the source, and the copy must be fully
       * qualified, or else we can't build up the array copy. We handled
       * out-of-bounds accesses to the dest above. The types must match, since
       * copy_deref currently can't bitcast mismatched deref types.
       */
      if (src_deref &&
          (nir_deref_instr_has_indirect(src_deref) ||
           nir_deref_instr_is_known_out_of_bounds(src_deref) ||
           nir_deref_instr_has_indirect(dst_deref) ||
           !glsl_type_is_vector_or_scalar(src_deref->type) ||
           glsl_get_bare_type(src_deref->type) !=
           glsl_get_bare_type(dst_deref->type))) {
         src_deref = NULL;
      }

      state->builder.cursor = nir_after_instr(instr);
      progress |= handle_write(dst_deref, src_deref, instr->index,
                               load_index, state);
   }

   return progress;
}

static bool
opt_find_array_copies_impl(nir_function_impl *impl)
{
   nir_builder b;
   nir_builder_init(&b, impl);

   bool progress = false;

   struct match_state s;
   s.dead_ctx = ralloc_context(NULL);
   s.table = _mesa_pointer_hash_table_create(s.dead_ctx);
   nir_builder_init(&s.builder, impl);

   nir_foreach_block(block, impl) {
      if (opt_find_array_copies_block(&b, block, &s))
         progress = true;
   }

   ralloc_free(s.dead_ctx);

   if (progress) {
      nir_metadata_preserve(impl, nir_metadata_block_index |
                                  nir_metadata_dominance);
   }

   return progress;
}

/**
 * This peephole optimization looks for a series of load/store_deref or
 * copy_deref instructions that copy an array from one variable to another and
 * turns it into a copy_deref that copies the entire array.  The pattern it
 * looks for is extremely specific but it's good enough to pick up on the
 * input array copies in DXVK and should also be able to pick up the sequence
 * generated by spirv_to_nir for a OpLoad of a large composite followed by
 * OpStore.
 *
 * TODO: Support out-of-order copies.
 */
bool
nir_opt_find_array_copies(nir_shader *shader)
{
   bool progress = false;

   nir_foreach_function(function, shader) {
      if (function->impl && opt_find_array_copies_impl(function->impl))
         progress = true;
   }

   return progress;
}