mesa/src/compiler/nir/nir_opt_rematerialize_compares.c

/*
 * Copyright © 2019 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "nir/nir_builder.h"
#include "nir.h"
#include "nir_constant_expressions.h"
#include "nir_control_flow.h"
#include "nir_loop_analyze.h"

static bool
is_two_src_comparison(const nir_alu_instr *instr)
{
   switch (instr->op) {
   case nir_op_flt:
   case nir_op_flt32:
   case nir_op_fge:
   case nir_op_fge32:
   case nir_op_feq:
   case nir_op_feq32:
   case nir_op_fneu:
   case nir_op_fneu32:
   case nir_op_ilt:
   case nir_op_ilt32:
   case nir_op_ult:
   case nir_op_ult32:
   case nir_op_ige:
   case nir_op_ige32:
   case nir_op_uge:
   case nir_op_uge32:
   case nir_op_ieq:
   case nir_op_ieq32:
   case nir_op_ine:
   case nir_op_ine32:
      return true;
   default:
      return false;
   }
}

static inline bool
is_zero(const nir_alu_instr *instr, unsigned src, unsigned num_components,
        const uint8_t *swizzle)
{
   /* only constant srcs: */
   if (!nir_src_is_const(instr->src[src].src))
      return false;

   for (unsigned i = 0; i < num_components; i++) {
      nir_alu_type type = nir_op_infos[instr->op].input_types[src];
      switch (nir_alu_type_get_base_type(type)) {
      case nir_type_int:
      case nir_type_uint: {
         if (nir_src_comp_as_int(instr->src[src].src, swizzle[i]) != 0)
            return false;
         break;
      }
      case nir_type_float: {
         if (nir_src_comp_as_float(instr->src[src].src, swizzle[i]) != 0)
            return false;
         break;
      }
      default:
         return false;
      }
   }

   return true;
}

static bool
all_uses_are_bcsel(const nir_alu_instr *instr)
{
   nir_foreach_use(use, &instr->def) {
      if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
         return false;

      nir_alu_instr *const alu = nir_instr_as_alu(nir_src_parent_instr(use));
      if (alu->op != nir_op_bcsel &&
          alu->op != nir_op_b32csel)
         return false;

      /* Not only must the result be used by a bcsel, but it must be used as
       * the first source (the condition).
       */
      if (alu->src[0].src.ssa != &instr->def)
         return false;
   }

   return true;
}

static bool
all_uses_are_compare_with_zero(const nir_alu_instr *instr)
{
   nir_foreach_use(use, &instr->def) {
      if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
         return false;

      nir_alu_instr *const alu = nir_instr_as_alu(nir_src_parent_instr(use));
      if (!is_two_src_comparison(alu))
         return false;

      if (!is_zero(alu, 0, 1, alu->src[0].swizzle) &&
          !is_zero(alu, 1, 1, alu->src[1].swizzle))
         return false;

      if (!all_uses_are_bcsel(alu))
         return false;
   }

   return true;
}

static bool
nir_opt_rematerialize_compares_impl(nir_shader *shader, nir_function_impl *impl)
{
   bool progress = false;

   nir_foreach_block(block, impl) {
      nir_foreach_instr(instr, block) {
         if (instr->type != nir_instr_type_alu)
            continue;

         nir_alu_instr *const alu = nir_instr_as_alu(instr);
         if (!is_two_src_comparison(alu))
            continue;

         if (!all_uses_are_bcsel(alu))
            continue;

         /* At this point it is known that alu is a comparison instruction
          * that is only used by nir_op_bcsel and possibly by if-statements
          * (though the latter has not been explicitly checked).
          *
          * Iterate through each use of the comparison.  For every use (or use
          * by an if-statement) that is in a different block, emit a copy of
          * the comparison.  Care must be taken here.  The original
          * instruction must be duplicated only once in each block because CSE
          * cannot be run after this pass.
          */
         nir_foreach_use_including_if_safe(use, &alu->def) {
            if (nir_src_is_if(use)) {
               nir_if *const if_stmt = nir_src_parent_if(use);

               nir_block *const prev_block =
                  nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));

               /* If the compare is from the previous block, don't
                * rematerialize.
                */
               if (prev_block == alu->instr.block)
                  continue;

               nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);

               nir_instr_insert_after_block(prev_block, &clone->instr);

               nir_src_rewrite(&if_stmt->condition, &clone->def);
               progress = true;
            } else {
               nir_instr *const use_instr = nir_src_parent_instr(use);

               /* If the use is in the same block as the def, don't
                * rematerialize.
                */
               if (use_instr->block == alu->instr.block)
                  continue;

               nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);

               nir_instr_insert_before(use_instr, &clone->instr);

               nir_alu_instr *const use_alu = nir_instr_as_alu(use_instr);
               for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) {
                  if (use_alu->src[i].src.ssa == &alu->def) {
                     nir_src_rewrite(&use_alu->src[i].src, &clone->def);
                     progress = true;
                  }
               }
            }
         }
      }
   }

   return nir_progress(progress, impl, nir_metadata_control_flow);
}

static bool
nir_opt_rematerialize_alu_impl(nir_shader *shader, nir_function_impl *impl)
{
   bool progress = false;

   nir_foreach_block(block, impl) {
      nir_foreach_instr(instr, block) {
         if (instr->type != nir_instr_type_alu)
            continue;

         nir_alu_instr *const alu = nir_instr_as_alu(instr);

         /* This list only include ALU ops that are likely to be able to have
          * cmod propagation on Intel GPUs.
          */
         switch (alu->op) {
         case nir_op_ineg:
         case nir_op_iabs:
         case nir_op_fneg:
         case nir_op_fabs:
         case nir_op_fadd:
         case nir_op_iadd:
         case nir_op_iadd_sat:
         case nir_op_uadd_sat:
         case nir_op_isub_sat:
         case nir_op_usub_sat:
         case nir_op_irhadd:
         case nir_op_urhadd:
         case nir_op_fmul:
         case nir_op_inot:
         case nir_op_iand:
         case nir_op_ior:
         case nir_op_ixor:
         case nir_op_ffloor:
         case nir_op_ffract:
         case nir_op_uclz:
         case nir_op_ishl:
         case nir_op_ishr:
         case nir_op_ushr:
         case nir_op_urol:
         case nir_op_uror:
            break; /* ... from switch. */
         default:
            continue; /* ... with loop. */
         }

         /* To help prevent increasing live ranges, require that one of the
          * sources be a constant.
          */
         if (nir_op_infos[alu->op].num_inputs == 2 &&
             !nir_src_is_const(alu->src[0].src) &&
             !nir_src_is_const(alu->src[1].src))
            continue;

         if (!all_uses_are_compare_with_zero(alu))
            continue;

         /* At this point it is known that the alu is only used by a
          * comparison with zero that is used by nir_op_bcsel and possibly by
          * if-statements (though the latter has not been explicitly checked).
          *
          * Iterate through each use of the ALU.  For every use that is in a
          * different block, emit a copy of the ALU.  Care must be taken here.
          * The original instruction must be duplicated only once in each
          * block because CSE cannot be run after this pass.
          */
         nir_foreach_use_safe(use, &alu->def) {
            nir_instr *const use_instr = nir_src_parent_instr(use);

            /* If the use is in the same block as the def, don't
             * rematerialize.
             */
            if (use_instr->block == alu->instr.block)
               continue;

            nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);

            nir_instr_insert_before(use_instr, &clone->instr);

            nir_alu_instr *const use_alu = nir_instr_as_alu(use_instr);
            for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) {
               if (use_alu->src[i].src.ssa == &alu->def) {
                  nir_src_rewrite(&use_alu->src[i].src, &clone->def);
                  progress = true;
               }
            }
         }
      }
   }

   return nir_progress(progress, impl, nir_metadata_control_flow);
}

bool
nir_opt_rematerialize_compares(nir_shader *shader)
{
   bool progress = false;

   nir_foreach_function_impl(impl, shader) {
      progress = nir_opt_rematerialize_compares_impl(shader, impl) || progress;

      progress = nir_opt_rematerialize_alu_impl(shader, impl) || progress;
   }

   return progress;
}