mesa/src/compiler/nir/nir_lower_subgroups.c

/*
 * Copyright © 2023 Collabora, Ltd.
 * Copyright © 2017 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "util/u_math.h"
#include "nir.h"
#include "nir_builder.h"

/**
 * \file nir_opt_intrinsics.c
 */

static nir_intrinsic_instr *
lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
                                      unsigned int component)
{
   nir_def *comp;
   if (component == 0)
      comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
   else
      comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);

   nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
   nir_def_init(&intr->instr, &intr->def, 1, 32);
   intr->const_index[0] = intrin->const_index[0];
   intr->const_index[1] = intrin->const_index[1];
   intr->src[0] = nir_src_for_ssa(comp);
   if (nir_intrinsic_infos[intrin->intrinsic].num_srcs == 2)
      intr->src[1] = nir_src_for_ssa(intrin->src[1].ssa);

   intr->num_components = 1;
   nir_builder_instr_insert(b, &intr->instr);
   return intr;
}

static nir_def *
lower_subgroup_op_to_32bit(nir_builder *b, nir_intrinsic_instr *intrin)
{
   assert(intrin->src[0].ssa->bit_size == 64);
   nir_intrinsic_instr *intr_x = lower_subgroups_64bit_split_intrinsic(b, intrin, 0);
   nir_intrinsic_instr *intr_y = lower_subgroups_64bit_split_intrinsic(b, intrin, 1);
   return nir_pack_64_2x32_split(b, &intr_x->def, &intr_y->def);
}

/* Return a mask which is 1 for threads up to the run-time subgroup size, i.e.
 * 1 for the entire subgroup. SPIR-V requires us to return 0 for indices at or
 * above the subgroup size for the masks, but gt_mask and ge_mask make them 1
 * so we have to "and" with this mask.
 */
static nir_def *
build_subgroup_mask(nir_builder *b,
                    const nir_lower_subgroups_options *options)
{
   nir_def *subgroup_size = nir_load_subgroup_size(b);

   /* First compute the result assuming one ballot component. */
   nir_def *result =
      nir_ushr(b, nir_imm_intN_t(b, ~0ull, options->ballot_bit_size),
               nir_isub_imm(b, options->ballot_bit_size,
                            subgroup_size));

   /* Since the subgroup size and ballot bitsize are both powers of two, there
    * are two possible cases to consider:
    *
    * (1) The subgroup size is less than the ballot bitsize. We need to return
    * "result" in the first component and 0 in every other component.
    * (2) The subgroup size is a multiple of the ballot bitsize. We need to
    * return ~0 if the subgroup size divided by the ballot bitsize is less
    * than or equal to the index in the vector and 0 otherwise. For example,
    * with a target ballot type of 4 x uint32 and subgroup_size = 64 we'd need
    * to return { ~0, ~0, 0, 0 }.
    *
    * In case (2) it turns out that "result" will be ~0, because
    * "ballot_bit_size - subgroup_size" is also a multiple of
    * "ballot_bit_size" and since nir_ushr masks the shift value it will
    * shifted by 0. This means that the first component can just be "result"
    * in all cases.  The other components will also get the correct value in
    * case (1) if we just use the rule in case (2), so we'll get the correct
    * result if we just follow (2) and then replace the first component with
    * "result".
    */
   nir_const_value min_idx[4];
   for (unsigned i = 0; i < options->ballot_components; i++)
      min_idx[i] = nir_const_value_for_int(i * options->ballot_bit_size, 32);
   nir_def *min_idx_val = nir_build_imm(b, options->ballot_components, 32, min_idx);

   nir_def *result_extended =
      nir_pad_vector_imm_int(b, result, ~0ull, options->ballot_components);

   return nir_bcsel(b, nir_ult(b, min_idx_val, subgroup_size),
                    result_extended, nir_imm_intN_t(b, 0, options->ballot_bit_size));
}

/* Return a ballot-mask-sized value which represents "val" sign-extended and
 * then shifted left by "shift". Only particular values for "val" are
 * supported, see below.
 *
 * This function assumes that `val << shift` will never span a ballot_bit_size
 * word and that the high bit of val can be extended across the entire result.
 * This is trivially satisfied for 0, 1, ~0, and ~1.  However, it may also be
 * fine for other values if the shift is guaranteed to be sufficiently
 * aligned.  One example is 0xf when the shift is known to be a multiple of 4.
 */
static nir_def *
build_ballot_imm_ishl(nir_builder *b, int64_t val, nir_def *shift,
                      const nir_lower_subgroups_options *options)
{
   /* First compute the result assuming one ballot component. */
   nir_def *result =
      nir_ishl(b, nir_imm_intN_t(b, val, options->ballot_bit_size), shift);

   if (options->ballot_components == 1)
      return result;

   /* Fix up the result when there is > 1 component. The idea is that nir_ishl
    * masks out the high bits of the shift value already, so in case there's
    * more than one component the component which 1 would be shifted into
    * already has the right value and all we have to do is fixup the other
    * components. Components below it should always be 0, and components above
    * it must be either 0 or ~0 because of the assert above. For example, if
    * the target ballot size is 2 x uint32, and we're shifting 1 by 33, then
    * we'll feed 33 into ishl, which will mask it off to get 1, so we'll
    * compute a single-component result of 2, which is correct for the second
    * component, but the first component needs to be 0, which we get by
    * comparing the high bits of the shift with 0 and selecting the original
    * answer or 0 for the first component (and something similar with the
    * second component). This idea is generalized here for any component count
    */
   nir_const_value min_shift[4];
   for (unsigned i = 0; i < options->ballot_components; i++)
      min_shift[i] = nir_const_value_for_int(i * options->ballot_bit_size, 32);
   nir_def *min_shift_val = nir_build_imm(b, options->ballot_components, 32, min_shift);

   nir_const_value max_shift[4];
   for (unsigned i = 0; i < options->ballot_components; i++)
      max_shift[i] = nir_const_value_for_int((i + 1) * options->ballot_bit_size, 32);
   nir_def *max_shift_val = nir_build_imm(b, options->ballot_components, 32, max_shift);

   return nir_bcsel(b, nir_ult(b, shift, max_shift_val),
                    nir_bcsel(b, nir_ult(b, shift, min_shift_val),
                              nir_imm_intN_t(b, val >> 63, result->bit_size),
                              result),
                    nir_imm_intN_t(b, 0, result->bit_size));
}

static nir_def *
ballot_type_to_uint(nir_builder *b, nir_def *value,
                    const nir_lower_subgroups_options *options)
{
   /* Allow internal generated ballots to pass through */
   if (value->num_components == options->ballot_components &&
       value->bit_size == options->ballot_bit_size)
      return value;

   /* Only the new-style SPIR-V subgroup instructions take a ballot result as
    * an argument, so we only use this on uvec4 types.
    */
   assert(value->num_components == 4 && value->bit_size == 32);

   return nir_extract_bits(b, &value, 1, 0, options->ballot_components,
                           options->ballot_bit_size);
}

static nir_def *
uint_to_ballot_type(nir_builder *b, nir_def *value,
                    unsigned num_components, unsigned bit_size)
{
   assert(util_is_power_of_two_nonzero(num_components));
   assert(util_is_power_of_two_nonzero(value->num_components));

   unsigned total_bits = bit_size * num_components;

   /* If the source doesn't have enough bits, zero-pad */
   if (total_bits > value->bit_size * value->num_components)
      value = nir_pad_vector_imm_int(b, value, 0, total_bits / value->bit_size);

   value = nir_bitcast_vector(b, value, bit_size);

   /* If the source has too many components, truncate.  This can happen if,
    * for instance, we're implementing GL_ARB_shader_ballot or
    * VK_EXT_shader_subgroup_ballot which have 64-bit ballot values on an
    * architecture with a native 128-bit uvec4 ballot.  This comes up in Zink
    * for OpenGL on Vulkan.  It's the job of the driver calling this lowering
    * pass to ensure that it's restricted subgroup sizes sufficiently that we
    * have enough ballot bits.
    */
   if (value->num_components > num_components)
      value = nir_trim_vector(b, value, num_components);

   return value;
}

static nir_def *
lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
{
   /* This is safe to call on scalar things but it would be silly */
   assert(intrin->def.num_components > 1);

   nir_def *value = intrin->src[0].ssa;
   nir_def *reads[NIR_MAX_VEC_COMPONENTS];

   for (unsigned i = 0; i < intrin->num_components; i++) {
      nir_intrinsic_instr *chan_intrin =
         nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
      nir_def_init(&chan_intrin->instr, &chan_intrin->def, 1,
                   intrin->def.bit_size);
      chan_intrin->num_components = 1;

      /* value */
      chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
      /* invocation */
      if (nir_intrinsic_infos[intrin->intrinsic].num_srcs > 1) {
         assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs == 2);
         chan_intrin->src[1] = nir_src_for_ssa(intrin->src[1].ssa);
      }

      chan_intrin->const_index[0] = intrin->const_index[0];
      chan_intrin->const_index[1] = intrin->const_index[1];

      nir_builder_instr_insert(b, &chan_intrin->instr);
      reads[i] = &chan_intrin->def;
   }

   return nir_vec(b, reads, intrin->num_components);
}

static nir_def *
lower_vote_eq_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
{
   nir_def *value = intrin->src[0].ssa;

   nir_def *result = NULL;
   for (unsigned i = 0; i < intrin->num_components; i++) {
      nir_def* chan = nir_channel(b, value, i);

      if (intrin->intrinsic == nir_intrinsic_vote_feq) {
         chan = nir_vote_feq(b, intrin->def.bit_size, chan);
      } else {
         chan = nir_vote_ieq(b, intrin->def.bit_size, chan);
      }

      if (result) {
         result = nir_iand(b, result, chan);
      } else {
         result = chan;
      }
   }

   return result;
}

static nir_def *
lower_vote_eq(nir_builder *b, nir_intrinsic_instr *intrin)
{
   nir_def *value = intrin->src[0].ssa;

   /* We have to implicitly lower to scalar */
   nir_def *all_eq = NULL;
   for (unsigned i = 0; i < intrin->num_components; i++) {
      nir_def *rfi = nir_read_first_invocation(b, nir_channel(b, value, i));

      nir_def *is_eq;
      if (intrin->intrinsic == nir_intrinsic_vote_feq) {
         is_eq = nir_feq(b, rfi, nir_channel(b, value, i));
      } else {
         is_eq = nir_ieq(b, rfi, nir_channel(b, value, i));
      }

      if (all_eq == NULL) {
         all_eq = is_eq;
      } else {
         all_eq = nir_iand(b, all_eq, is_eq);
      }
   }

   return nir_vote_all(b, 1, all_eq);
}

static nir_def *
lower_shuffle_to_swizzle(nir_builder *b, nir_intrinsic_instr *intrin)
{
   unsigned mask = nir_src_as_uint(intrin->src[1]);

   if (mask >= 32)
      return NULL;

   return nir_masked_swizzle_amd(b, intrin->src[0].ssa,
                                 .swizzle_mask = (mask << 10) | 0x1f,
                                 .fetch_inactive = true);
}

/* Lowers "specialized" shuffles to a generic nir_intrinsic_shuffle. */

static nir_def *
lower_to_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
                 const nir_lower_subgroups_options *options)
{
   if (intrin->intrinsic == nir_intrinsic_shuffle_xor &&
       options->lower_shuffle_to_swizzle_amd &&
       nir_src_is_const(intrin->src[1])) {

      nir_def *result = lower_shuffle_to_swizzle(b, intrin);
      if (result)
         return result;
   }

   nir_def *index = nir_load_subgroup_invocation(b);
   switch (intrin->intrinsic) {
   case nir_intrinsic_shuffle_xor:
      index = nir_ixor(b, index, intrin->src[1].ssa);
      break;
   case nir_intrinsic_shuffle_up:
      index = nir_isub(b, index, intrin->src[1].ssa);
      break;
   case nir_intrinsic_shuffle_down:
      index = nir_iadd(b, index, intrin->src[1].ssa);
      break;
   case nir_intrinsic_quad_broadcast:
      index = nir_ior(b, nir_iand_imm(b, index, ~0x3),
                      intrin->src[1].ssa);
      break;
   case nir_intrinsic_quad_swap_horizontal:
      /* For Quad operations, subgroups are divided into quads where
       * (invocation % 4) is the index to a square arranged as follows:
       *
       *    +---+---+
       *    | 0 | 1 |
       *    +---+---+
       *    | 2 | 3 |
       *    +---+---+
       */
      index = nir_ixor(b, index, nir_imm_int(b, 0x1));
      break;
   case nir_intrinsic_quad_swap_vertical:
      index = nir_ixor(b, index, nir_imm_int(b, 0x2));
      break;
   case nir_intrinsic_quad_swap_diagonal:
      index = nir_ixor(b, index, nir_imm_int(b, 0x3));
      break;
   case nir_intrinsic_rotate: {
      nir_def *delta = intrin->src[1].ssa;
      nir_def *local_id = nir_load_subgroup_invocation(b);
      const unsigned cluster_size = nir_intrinsic_cluster_size(intrin);

      nir_def *rotation_group_mask =
         cluster_size > 0 ? nir_imm_int(b, (int)(cluster_size - 1)) : nir_iadd_imm(b, nir_load_subgroup_size(b), -1);

      index = nir_iand(b, nir_iadd(b, local_id, delta),
                       rotation_group_mask);
      if (cluster_size > 0) {
         index = nir_iadd(b, index,
                          nir_iand(b, local_id, nir_inot(b, rotation_group_mask)));
      }
      break;
   }
   default:
      unreachable("Invalid intrinsic");
   }

   return nir_shuffle(b, intrin->src[0].ssa, index);
}

static const struct glsl_type *
glsl_type_for_ssa(nir_def *def)
{
   const struct glsl_type *comp_type = def->bit_size == 1 ? glsl_bool_type() : glsl_uintN_t_type(def->bit_size);
   return glsl_replace_vector_type(comp_type, def->num_components);
}

/* Lower nir_intrinsic_shuffle to a waterfall loop + nir_read_invocation.
 */
static nir_def *
lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin)
{
   nir_def *val = intrin->src[0].ssa;
   nir_def *id = intrin->src[1].ssa;

   /* The loop is something like:
    *
    * while (true) {
    *    first_id = readFirstInvocation(gl_SubgroupInvocationID);
    *    first_val = readFirstInvocation(val);
    *    first_result = readInvocation(val, readFirstInvocation(id));
    *    if (id == first_id)
    *       result = first_val;
    *    if (elect()) {
    *       if (id > gl_SubgroupInvocationID) {
    *          result = first_result;
    *       }
    *       break;
    *    }
    * }
    *
    * The idea is to guarantee, on each iteration of the loop, that anything
    * reading from first_id gets the correct value, so that we can then kill
    * it off by breaking out of the loop. Before doing that we also have to
    * ensure that first_id invocation gets the correct value. It only won't be
    * assigned the correct value already if the invocation it's reading from
    * isn't already killed off, that is, if it's later than its own ID.
    * Invocations where id <= gl_SubgroupInvocationID will be assigned their
    * result in the first if, and invocations where id >
    * gl_SubgroupInvocationID will be assigned their result in the second if.
    *
    * We do this more complicated loop rather than looping over all id's
    * explicitly because at this point we don't know the "actual" subgroup
    * size and at the moment there's no way to get at it, which means we may
    * loop over always-inactive invocations.
    */

   nir_def *subgroup_id = nir_load_subgroup_invocation(b);

   nir_variable *result =
      nir_local_variable_create(b->impl, glsl_type_for_ssa(val), "result");

   nir_loop *loop = nir_push_loop(b);
   {
      nir_def *first_id = nir_read_first_invocation(b, subgroup_id);
      nir_def *first_val = nir_read_first_invocation(b, val);
      nir_def *first_result =
         nir_read_invocation(b, val, nir_read_first_invocation(b, id));

      nir_if *nif = nir_push_if(b, nir_ieq(b, id, first_id));
      {
         nir_store_var(b, result, first_val, BITFIELD_MASK(val->num_components));
      }
      nir_pop_if(b, nif);

      nir_if *nif2 = nir_push_if(b, nir_elect(b, 1));
      {
         nir_if *nif3 = nir_push_if(b, nir_ult(b, subgroup_id, id));
         {
            nir_store_var(b, result, first_result, BITFIELD_MASK(val->num_components));
         }
         nir_pop_if(b, nif3);

         nir_jump(b, nir_jump_break);
      }
      nir_pop_if(b, nif2);
   }
   nir_pop_loop(b, loop);

   return nir_load_var(b, result);
}

static nir_def *
lower_boolean_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
                      const nir_lower_subgroups_options *options)
{
   assert(options->ballot_components == 1 && options->subgroup_size);
   nir_def *ballot = nir_ballot_relaxed(b, 1, options->ballot_bit_size, intrin->src[0].ssa);

   nir_def *index = NULL;

   /* If the shuffle amount isn't constant, it might be divergent but
    * inverse_ballot requires a uniform source, so take a different path.
    * rotate allows us to assume the delta is uniform unlike shuffle_up/down.
    */
   switch (intrin->intrinsic) {
   case nir_intrinsic_shuffle_up:
      if (nir_src_is_const(intrin->src[1]))
         ballot = nir_ishl(b, ballot, intrin->src[1].ssa);
      else
         index = nir_isub(b, nir_load_subgroup_invocation(b), intrin->src[1].ssa);
      break;
   case nir_intrinsic_shuffle_down:
      if (nir_src_is_const(intrin->src[1]))
         ballot = nir_ushr(b, ballot, intrin->src[1].ssa);
      else
         index = nir_iadd(b, nir_load_subgroup_invocation(b), intrin->src[1].ssa);
      break;
   case nir_intrinsic_shuffle_xor:
      index = nir_ixor(b, nir_load_subgroup_invocation(b), intrin->src[1].ssa);
      break;
   case nir_intrinsic_rotate: {
      nir_def *delta = nir_as_uniform(b, intrin->src[1].ssa);
      uint32_t cluster_size = nir_intrinsic_cluster_size(intrin);
      cluster_size = cluster_size ? cluster_size : options->subgroup_size;
      cluster_size = MIN2(cluster_size, options->subgroup_size);
      if (cluster_size == 1) {
         return intrin->src[0].ssa;
      } else if (cluster_size == 2) {
         delta = nir_iand_imm(b, delta, cluster_size - 1);
         nir_def *lo = nir_iand_imm(b, nir_ushr_imm(b, ballot, 1), 0x5555555555555555ull);
         nir_def *hi = nir_iand_imm(b, nir_ishl_imm(b, ballot, 1), 0xaaaaaaaaaaaaaaaaull);
         ballot = nir_bcsel(b, nir_ine_imm(b, delta, 0), nir_ior(b, hi, lo), ballot);
      } else if (cluster_size == ballot->bit_size) {
         ballot = nir_uror(b, ballot, delta);
      } else if (cluster_size == 32) {
         nir_def *unpacked = nir_unpack_64_2x32(b, ballot);
         unpacked = nir_uror(b, unpacked, delta);
         ballot = nir_pack_64_2x32(b, unpacked);
      } else {
         delta = nir_iand_imm(b, delta, cluster_size - 1);
         nir_def *delta_rev = nir_isub_imm(b, cluster_size, delta);
         nir_def *mask = nir_mask(b, delta_rev, ballot->bit_size);
         for (uint32_t i = cluster_size; i < ballot->bit_size; i *= 2) {
            mask = nir_ior(b, nir_ishl_imm(b, mask, i), mask);
         }
         nir_def *lo = nir_iand(b, nir_ushr(b, ballot, delta), mask);
         nir_def *hi = nir_iand(b, nir_ishl(b, ballot, delta_rev), nir_inot(b, mask));
         ballot = nir_ior(b, lo, hi);
      }
      break;
   }
   case nir_intrinsic_shuffle:
      index = intrin->src[1].ssa;
      break;
   case nir_intrinsic_read_invocation:
      index = nir_as_uniform(b, intrin->src[1].ssa);
      break;
   default:
      unreachable("not a boolean shuffle");
   }

   if (index) {
      nir_def *mask = nir_ishl(b, nir_imm_intN_t(b, 1, ballot->bit_size), index);
      return nir_ine_imm(b, nir_iand(b, ballot, mask), 0);
   } else {
      return nir_inverse_ballot(b, 1, ballot);
   }
}

static nir_def *
vec_bit_count(nir_builder *b, nir_def *value)
{
   nir_def *vec_result = nir_bit_count(b, value);
   nir_def *result = nir_channel(b, vec_result, 0);
   for (unsigned i = 1; i < value->num_components; i++)
      result = nir_iadd(b, result, nir_channel(b, vec_result, i));
   return result;
}

/* produce a bitmask of 111...000...111... alternating between "size"
 * 1's and "size" 0's (the LSB is 1).
 */
static uint64_t
reduce_mask(unsigned size, unsigned ballot_bit_size)
{
   uint64_t mask = 0;
   for (unsigned i = 0; i < ballot_bit_size; i += 2 * size) {
      mask |= ((1ull << size) - 1) << i;
   }

   return mask;
}

/* operate on a uniform per-thread bitmask provided by ballot() to perform the
 * desired Boolean reduction. Assumes that the identity of the operation is
 * false (so, no iand).
 */
static nir_def *
lower_boolean_reduce_internal(nir_builder *b, nir_def *src,
                              unsigned cluster_size, nir_op op,
                              const nir_lower_subgroups_options *options)
{
   for (unsigned size = 1; size < cluster_size; size *= 2) {
      nir_def *shifted = nir_ushr_imm(b, src, size);
      src = nir_build_alu2(b, op, shifted, src);
      uint64_t mask = reduce_mask(size, options->ballot_bit_size);
      src = nir_iand_imm(b, src, mask);
      shifted = nir_ishl_imm(b, src, size);
      src = nir_ior(b, src, shifted);
   }

   return src;
}

/* operate on a uniform per-thread bitmask provided by ballot() to perform the
 * desired Boolean inclusive scan. Assumes that the identity of the operation is
 * false (so, no iand).
 */
static nir_def *
lower_boolean_scan_internal(nir_builder *b, nir_def *src,
                            nir_op op,
                            const nir_lower_subgroups_options *options)
{
   if (op == nir_op_ior) {
      /* We want to return a bitmask with all 1's starting at the first 1 in
       * src. -src is equivalent to ~src + 1. While src | ~src returns all
       * 1's, src | (~src + 1) returns all 1's except for the bits changed by
       * the increment. Any 1's before the least significant 0 of ~src are
       * turned into 0 (zeroing those bits after or'ing) and the least
       * signficant 0 of ~src is turned into 1 (not doing anything). So the
       * final output is what we want.
       */
      return nir_ior(b, src, nir_ineg(b, src));
   } else {
      assert(op == nir_op_ixor);
      for (unsigned shift = 1; shift < options->ballot_bit_size; shift *= 2) {
         src = nir_ixor(b, src, nir_ishl_imm(b, src, shift));
      }
      return src;
   }
}

static nir_def *
lower_boolean_reduce(nir_builder *b, nir_intrinsic_instr *intrin,
                     const nir_lower_subgroups_options *options)
{
   assert(intrin->num_components == 1);
   assert(options->ballot_components == 1);

   unsigned cluster_size =
      intrin->intrinsic == nir_intrinsic_reduce ? nir_intrinsic_cluster_size(intrin) : 0;
   nir_op op = nir_intrinsic_reduction_op(intrin);

   /* For certain cluster sizes, reductions of iand and ior can be implemented
    * more efficiently.
    */
   if (intrin->intrinsic == nir_intrinsic_reduce) {
      if (cluster_size == 0) {
         if (op == nir_op_iand)
            return nir_vote_all(b, 1, intrin->src[0].ssa);
         else if (op == nir_op_ior)
            return nir_vote_any(b, 1, intrin->src[0].ssa);
         else if (op == nir_op_ixor)
            return nir_i2b(b, nir_iand_imm(b, vec_bit_count(b, nir_ballot(b,
                                                                          options->ballot_components,
                                                                          options->ballot_bit_size,
                                                                          intrin->src[0].ssa)),
                                           1));
         else
            unreachable("bad boolean reduction op");
      }

      if (cluster_size == 4) {
         if (op == nir_op_iand)
            return nir_quad_vote_all(b, 1, intrin->src[0].ssa);
         else if (op == nir_op_ior)
            return nir_quad_vote_any(b, 1, intrin->src[0].ssa);
      }
   }

   nir_def *src = intrin->src[0].ssa;

   /* Apply DeMorgan's law to implement "and" reductions, since all the
    * lower_boolean_*_internal() functions assume an identity of 0 to make the
    * generated code shorter.
    */
   nir_op new_op = (op == nir_op_iand) ? nir_op_ior : op;
   if (op == nir_op_iand) {
      src = nir_inot(b, src);
   }

   nir_def *val = nir_ballot(b, options->ballot_components, options->ballot_bit_size, src);

   switch (intrin->intrinsic) {
   case nir_intrinsic_reduce:
      val = lower_boolean_reduce_internal(b, val, cluster_size, new_op, options);
      break;
   case nir_intrinsic_inclusive_scan:
      val = lower_boolean_scan_internal(b, val, new_op, options);
      break;
   case nir_intrinsic_exclusive_scan:
      val = lower_boolean_scan_internal(b, val, new_op, options);
      val = nir_ishl_imm(b, val, 1);
      break;
   default:
      unreachable("bad intrinsic");
   }

   if (op == nir_op_iand) {
      val = nir_inot(b, val);
   }

   return nir_inverse_ballot(b, 1, val);
}

static nir_def *
build_identity(nir_builder *b, unsigned bit_size, nir_op op)
{
   nir_const_value ident_const = nir_alu_binop_identity(op, bit_size);
   return nir_build_imm(b, 1, bit_size, &ident_const);
}

/* Implementation of scan/reduce that assumes a full subgroup */
static nir_def *
build_scan_full(nir_builder *b, nir_intrinsic_op op, nir_op red_op,
                nir_def *data, unsigned cluster_size)
{
   switch (op) {
   case nir_intrinsic_exclusive_scan:
   case nir_intrinsic_inclusive_scan: {
      for (unsigned i = 1; i < cluster_size; i *= 2) {
         nir_def *idx = nir_load_subgroup_invocation(b);
         nir_def *has_buddy = nir_ige_imm(b, idx, i);

         nir_def *buddy_data = nir_shuffle_up(b, data, nir_imm_int(b, i));
         nir_def *accum = nir_build_alu2(b, red_op, data, buddy_data);
         data = nir_bcsel(b, has_buddy, accum, data);
      }

      if (op == nir_intrinsic_exclusive_scan) {
         /* For exclusive scans, we need to shift one more time and fill in the
          * bottom channel with identity.
          */
         nir_def *idx = nir_load_subgroup_invocation(b);
         nir_def *has_buddy = nir_ige_imm(b, idx, 1);

         nir_def *buddy_data = nir_shuffle_up(b, data, nir_imm_int(b, 1));
         nir_def *identity = build_identity(b, data->bit_size, red_op);
         data = nir_bcsel(b, has_buddy, buddy_data, identity);
      }

      return data;
   }

   case nir_intrinsic_reduce: {
      for (unsigned i = 1; i < cluster_size; i *= 2) {
         nir_def *buddy_data = nir_shuffle_xor(b, data, nir_imm_int(b, i));
         data = nir_build_alu2(b, red_op, data, buddy_data);
      }
      return data;
   }

   default:
      unreachable("Unsupported scan/reduce op");
   }
}

/* Fully generic implementation of scan/reduce that takes a mask */
static nir_def *
build_scan_reduce(nir_builder *b, nir_intrinsic_op op, nir_op red_op,
                  nir_def *data, nir_def *mask, unsigned max_mask_bits,
                  const nir_lower_subgroups_options *options)
{
   nir_def *lt_mask = nir_load_subgroup_lt_mask(b, options->ballot_components,
                                                options->ballot_bit_size);

   /* Mask of all channels whose values we need to accumulate.  Our own value
    * is already in accum, if inclusive, thanks to the initialization above.
    * We only need to consider lower indexed invocations.
    */
   nir_def *remaining = nir_iand(b, mask, lt_mask);

   for (unsigned i = 1; i < max_mask_bits; i *= 2) {
      /* At each step, our buddy channel is the first channel we have yet to
       * take into account in the accumulator.
       */
      nir_def *has_buddy = nir_bany_inequal(b, remaining, nir_imm_int(b, 0));
      nir_def *buddy = nir_ballot_find_msb(b, 32, remaining);

      /* Accumulate with our buddy channel, if any */
      nir_def *buddy_data = nir_shuffle(b, data, buddy);
      nir_def *accum = nir_build_alu2(b, red_op, data, buddy_data);
      data = nir_bcsel(b, has_buddy, accum, data);

      /* We just took into account everything in our buddy's accumulator from
       * the previous step.  The only things remaining are whatever channels
       * were remaining for our buddy.
       */
      nir_def *buddy_remaining = nir_shuffle(b, remaining, buddy);
      remaining = nir_bcsel(b, has_buddy, buddy_remaining, nir_imm_int(b, 0));
   }

   switch (op) {
   case nir_intrinsic_exclusive_scan: {
      /* For exclusive scans, we need to shift one more time and fill in the
       * bottom channel with identity.
       *
       * Some of this will get CSE'd with the first step but that's okay. The
       * code is cleaner this way.
       */
      nir_def *lower = nir_iand(b, mask, lt_mask);
      nir_def *has_buddy = nir_bany_inequal(b, lower, nir_imm_int(b, 0));
      nir_def *buddy = nir_ballot_find_msb(b, 32, lower);

      nir_def *buddy_data = nir_shuffle(b, data, buddy);
      nir_def *identity = build_identity(b, data->bit_size, red_op);
      return nir_bcsel(b, has_buddy, buddy_data, identity);
   }

   case nir_intrinsic_inclusive_scan:
      return data;

   case nir_intrinsic_reduce: {
      /* For reductions, we need to take the top value of the scan */
      nir_def *idx = nir_ballot_find_msb(b, 32, mask);
      return nir_shuffle(b, data, idx);
   }

   default:
      unreachable("Unsupported scan/reduce op");
   }
}

static nir_def *
build_cluster_mask(nir_builder *b, unsigned cluster_size,
                   const nir_lower_subgroups_options *options)
{
   nir_def *idx = nir_load_subgroup_invocation(b);
   nir_def *cluster = nir_iand_imm(b, idx, ~(uint64_t)(cluster_size - 1));

   if (cluster_size <= options->ballot_bit_size) {
      return build_ballot_imm_ishl(b, BITFIELD_MASK(cluster_size), cluster,
                                   options);
   }

   /* Since the cluster size and the ballot bit size are both powers of 2,
    * cluster size will be a multiple of the ballot bit size. Therefore, each
    * ballot component will be either all ones or all zeros. Build a vec for
    * which each component holds the value of `cluster` for which the mask
    * should be all ones.
    */
   nir_const_value cluster_sel_const[4];
   assert(ARRAY_SIZE(cluster_sel_const) >= options->ballot_components);

   for (unsigned i = 0; i < options->ballot_components; i++) {
      unsigned cluster_val =
         ROUND_DOWN_TO(i * options->ballot_bit_size, cluster_size);
      cluster_sel_const[i] =
         nir_const_value_for_uint(cluster_val, options->ballot_bit_size);
   }

   nir_def *cluster_sel =
      nir_build_imm(b, options->ballot_components, options->ballot_bit_size,
                    cluster_sel_const);
   nir_def *ones = nir_imm_intN_t(b, -1, options->ballot_bit_size);
   nir_def *zeros = nir_imm_intN_t(b, 0, options->ballot_bit_size);
   return nir_bcsel(b, nir_ieq(b, cluster, cluster_sel), ones, zeros);
}

static nir_def *
lower_scan_reduce(nir_builder *b, nir_intrinsic_instr *intrin,
                  const nir_lower_subgroups_options *options)
{
   const nir_op red_op = nir_intrinsic_reduction_op(intrin);
   unsigned subgroup_size = options->subgroup_size;

   /* Grab the cluster size */
   unsigned cluster_size = subgroup_size;
   if (nir_intrinsic_has_cluster_size(intrin)) {
      cluster_size = nir_intrinsic_cluster_size(intrin);
      if (cluster_size == 0 || cluster_size > subgroup_size)
         cluster_size = subgroup_size;
   }

   /* Check if all invocations are active. If so, we use the fast path. */
   nir_def *mask = nir_ballot(b, options->ballot_components,
                              options->ballot_bit_size, nir_imm_true(b));

   nir_def *full, *partial;
   nir_push_if(b, nir_ball_iequal(b, mask, build_subgroup_mask(b, options)));
   {
      full = build_scan_full(b, intrin->intrinsic, red_op,
                             intrin->src[0].ssa, cluster_size);
   }
   nir_push_else(b, NULL);
   {
      /* Mask according to the cluster size */
      if (cluster_size < subgroup_size) {
         nir_def *cluster_mask = build_cluster_mask(b, cluster_size, options);
         mask = nir_iand(b, mask, cluster_mask);
      }

      partial = build_scan_reduce(b, intrin->intrinsic, red_op,
                                  intrin->src[0].ssa, mask, cluster_size,
                                  options);
   }
   nir_pop_if(b, NULL);
   return nir_if_phi(b, full, partial);
}

static bool
lower_subgroups_filter(const nir_instr *instr, const void *_options)
{
   return instr->type == nir_instr_type_intrinsic;
}

static nir_def *
build_subgroup_eq_mask(nir_builder *b,
                       const nir_lower_subgroups_options *options)
{
   nir_def *subgroup_idx = nir_load_subgroup_invocation(b);

   return build_ballot_imm_ishl(b, 1, subgroup_idx, options);
}

static nir_def *
build_subgroup_ge_mask(nir_builder *b,
                       const nir_lower_subgroups_options *options)
{
   nir_def *subgroup_idx = nir_load_subgroup_invocation(b);

   return build_ballot_imm_ishl(b, ~0ull, subgroup_idx, options);
}

static nir_def *
build_subgroup_gt_mask(nir_builder *b,
                       const nir_lower_subgroups_options *options)
{
   nir_def *subgroup_idx = nir_load_subgroup_invocation(b);

   return build_ballot_imm_ishl(b, ~1ull, subgroup_idx, options);
}

static nir_def *
build_subgroup_quad_mask(nir_builder *b,
                         const nir_lower_subgroups_options *options)
{
   nir_def *subgroup_idx = nir_load_subgroup_invocation(b);
   nir_def *quad_first_idx = nir_iand_imm(b, subgroup_idx, ~0x3);

   return build_ballot_imm_ishl(b, 0xf, quad_first_idx, options);
}

static nir_def *
build_quad_vote_any(nir_builder *b, nir_def *src,
                    const nir_lower_subgroups_options *options)
{
   nir_def *ballot = nir_ballot(b, options->ballot_components,
                                   options->ballot_bit_size,
                                   src);
   nir_def *mask = build_subgroup_quad_mask(b, options);

   return nir_ine_imm(b, nir_iand(b, ballot, mask), 0);
}

static nir_def *
vec_find_lsb(nir_builder *b, nir_def *value)
{
   nir_def *vec_result = nir_find_lsb(b, value);
   nir_def *result = nir_imm_int(b, -1);
   for (int i = value->num_components - 1; i >= 0; i--) {
      nir_def *channel = nir_channel(b, vec_result, i);
      /* result = channel >= 0 ? (i * bitsize + channel) : result */
      result = nir_bcsel(b, nir_ige_imm(b, channel, 0),
                         nir_iadd_imm(b, channel, i * value->bit_size),
                         result);
   }
   return result;
}

static nir_def *
vec_find_msb(nir_builder *b, nir_def *value)
{
   nir_def *vec_result = nir_ufind_msb(b, value);
   nir_def *result = nir_imm_int(b, -1);
   for (unsigned i = 0; i < value->num_components; i++) {
      nir_def *channel = nir_channel(b, vec_result, i);
      /* result = channel >= 0 ? (i * bitsize + channel) : result */
      result = nir_bcsel(b, nir_ige_imm(b, channel, 0),
                         nir_iadd_imm(b, channel, i * value->bit_size),
                         result);
   }
   return result;
}

static nir_def *
lower_dynamic_quad_broadcast(nir_builder *b, nir_intrinsic_instr *intrin,
                             const nir_lower_subgroups_options *options)
{
   if (!options->lower_quad_broadcast_dynamic_to_const)
      return lower_to_shuffle(b, intrin, options);

   nir_def *dst = NULL;

   for (unsigned i = 0; i < 4; ++i) {
      nir_def *qbcst = nir_quad_broadcast(b, intrin->src[0].ssa,
                                              nir_imm_int(b, i));

      if (i)
         dst = nir_bcsel(b, nir_ieq_imm(b, intrin->src[1].ssa, i),
                         qbcst, dst);
      else
         dst = qbcst;
   }

   return dst;
}

static nir_def *
lower_first_invocation_to_ballot(nir_builder *b, nir_intrinsic_instr *intrin,
                                 const nir_lower_subgroups_options *options)
{
   return nir_ballot_find_lsb(b, 32, nir_ballot(b, 4, 32, nir_imm_true(b)));
}

static nir_def *
lower_read_first_invocation(nir_builder *b, nir_intrinsic_instr *intrin)
{
   return nir_read_invocation(b, intrin->src[0].ssa, nir_first_invocation(b));
}

static nir_def *
lower_read_invocation_to_cond(nir_builder *b, nir_intrinsic_instr *intrin)
{
   return nir_read_invocation_cond_ir3(b, intrin->def.bit_size,
                                       intrin->src[0].ssa,
                                       nir_ieq(b, intrin->src[1].ssa,
                                               nir_load_subgroup_invocation(b)));
}

static nir_def *
lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
{
   const nir_lower_subgroups_options *options = _options;

   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
   switch (intrin->intrinsic) {
   case nir_intrinsic_vote_any:
   case nir_intrinsic_vote_all:
      if (options->lower_vote_trivial)
         return intrin->src[0].ssa;
      break;

   case nir_intrinsic_vote_feq:
   case nir_intrinsic_vote_ieq:
      if (options->lower_vote_trivial)
         return nir_imm_true(b);

      if (nir_src_bit_size(intrin->src[0]) == 1) {
         if (options->lower_vote_bool_eq)
            return lower_vote_eq(b, intrin);
      } else {
         if (options->lower_vote_eq)
            return lower_vote_eq(b, intrin);
      }

      if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_vote_eq_to_scalar(b, intrin);
      break;

   case nir_intrinsic_load_subgroup_size:
      if (options->subgroup_size)
         return nir_imm_int(b, options->subgroup_size);
      break;

   case nir_intrinsic_first_invocation:
      if (options->subgroup_size == 1)
         return nir_imm_int(b, 0);

      if (options->lower_first_invocation_to_ballot)
         return lower_first_invocation_to_ballot(b, intrin, options);

      break;

   case nir_intrinsic_read_invocation:
      if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_subgroup_op_to_scalar(b, intrin);

      if (options->lower_boolean_shuffle && intrin->src[0].ssa->bit_size == 1)
         return lower_boolean_shuffle(b, intrin, options);

      if (options->lower_read_invocation_to_cond)
         return lower_read_invocation_to_cond(b, intrin);

      break;

   case nir_intrinsic_read_first_invocation:
      if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_subgroup_op_to_scalar(b, intrin);

      if (options->lower_read_first_invocation)
         return lower_read_first_invocation(b, intrin);
      break;

   case nir_intrinsic_load_subgroup_eq_mask:
   case nir_intrinsic_load_subgroup_ge_mask:
   case nir_intrinsic_load_subgroup_gt_mask:
   case nir_intrinsic_load_subgroup_le_mask:
   case nir_intrinsic_load_subgroup_lt_mask: {
      if (!options->lower_subgroup_masks)
         return NULL;

      nir_def *val;
      switch (intrin->intrinsic) {
      case nir_intrinsic_load_subgroup_eq_mask:
         val = build_subgroup_eq_mask(b, options);
         break;
      case nir_intrinsic_load_subgroup_ge_mask:
         val = nir_iand(b, build_subgroup_ge_mask(b, options),
                        build_subgroup_mask(b, options));
         break;
      case nir_intrinsic_load_subgroup_gt_mask:
         val = nir_iand(b, build_subgroup_gt_mask(b, options),
                        build_subgroup_mask(b, options));
         break;
      case nir_intrinsic_load_subgroup_le_mask:
         val = nir_inot(b, build_subgroup_gt_mask(b, options));
         break;
      case nir_intrinsic_load_subgroup_lt_mask:
         val = nir_inot(b, build_subgroup_ge_mask(b, options));
         break;
      default:
         unreachable("you seriously can't tell this is unreachable?");
      }

      return uint_to_ballot_type(b, val,
                                 intrin->def.num_components,
                                 intrin->def.bit_size);
   }

   case nir_intrinsic_ballot: {
      if (intrin->def.num_components == options->ballot_components &&
          intrin->def.bit_size == options->ballot_bit_size)
         return NULL;

      nir_def *ballot =
         nir_ballot(b, options->ballot_components, options->ballot_bit_size,
                    intrin->src[0].ssa);

      return uint_to_ballot_type(b, ballot,
                                 intrin->def.num_components,
                                 intrin->def.bit_size);
   }

   case nir_intrinsic_inverse_ballot:
      if (options->lower_inverse_ballot) {
         return nir_ballot_bitfield_extract(b, 1, intrin->src[0].ssa,
                                            nir_load_subgroup_invocation(b));
      } else if (intrin->src[0].ssa->num_components != options->ballot_components ||
                 intrin->src[0].ssa->bit_size != options->ballot_bit_size) {
         return nir_inverse_ballot(b, 1, ballot_type_to_uint(b, intrin->src[0].ssa, options));
      }
      break;

   case nir_intrinsic_ballot_bitfield_extract:
   case nir_intrinsic_ballot_bit_count_reduce:
   case nir_intrinsic_ballot_find_lsb:
   case nir_intrinsic_ballot_find_msb: {
      nir_def *int_val = ballot_type_to_uint(b, intrin->src[0].ssa,
                                             options);

      if (intrin->intrinsic != nir_intrinsic_ballot_bitfield_extract &&
          intrin->intrinsic != nir_intrinsic_ballot_find_lsb) {
         /* For OpGroupNonUniformBallotFindMSB, the SPIR-V Spec says:
          *
          *    "Find the most significant bit set to 1 in Value, considering
          *    only the bits in Value required to represent all bits of the
          *    group’s invocations.  If none of the considered bits is set to
          *    1, the result is undefined."
          *
          * It has similar text for the other three.  This means that, in case
          * the subgroup size is less than 32, we have to mask off the unused
          * bits.  If the subgroup size is fixed and greater than or equal to
          * 32, the mask will be 0xffffffff and nir_opt_algebraic will delete
          * the iand.
          *
          * We only have to worry about this for BitCount and FindMSB because
          * FindLSB counts from the bottom and BitfieldExtract selects
          * individual bits.  In either case, if run outside the range of
          * valid bits, we hit the undefined results case and we can return
          * anything we want.
          */
         int_val = nir_iand(b, int_val, build_subgroup_mask(b, options));
      }

      switch (intrin->intrinsic) {
      case nir_intrinsic_ballot_bitfield_extract: {
         nir_def *idx = intrin->src[1].ssa;
         if (int_val->num_components > 1) {
            /* idx will be truncated by nir_ushr, so we just need to select
             * the right component using the bits of idx that are truncated in
             * the shift.
             */
            int_val =
               nir_vector_extract(b, int_val,
                                  nir_udiv_imm(b, idx, int_val->bit_size));
         }

         return nir_test_mask(b, nir_ushr(b, int_val, idx), 1);
      }
      case nir_intrinsic_ballot_bit_count_reduce:
         return vec_bit_count(b, int_val);
      case nir_intrinsic_ballot_find_lsb:
         return vec_find_lsb(b, int_val);
      case nir_intrinsic_ballot_find_msb:
         return vec_find_msb(b, int_val);
      default:
         unreachable("you seriously can't tell this is unreachable?");
      }
   }

   case nir_intrinsic_ballot_bit_count_exclusive:
   case nir_intrinsic_ballot_bit_count_inclusive: {
      nir_def *int_val = ballot_type_to_uint(b, intrin->src[0].ssa,
                                             options);
      if (options->lower_ballot_bit_count_to_mbcnt_amd) {
         nir_def *acc;
         if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_exclusive) {
            acc = nir_imm_int(b, 0);
         } else {
            acc = nir_iand_imm(b, nir_u2u32(b, int_val), 0x1);
            int_val = nir_ushr_imm(b, int_val, 1);
         }
         return nir_mbcnt_amd(b, int_val, acc);
      }

      nir_def *mask;
      if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_inclusive) {
         mask = nir_inot(b, build_subgroup_gt_mask(b, options));
      } else {
         mask = nir_inot(b, build_subgroup_ge_mask(b, options));
      }

      return vec_bit_count(b, nir_iand(b, int_val, mask));
   }

   case nir_intrinsic_elect: {
      if (!options->lower_elect)
         return NULL;

      return nir_ieq(b, nir_load_subgroup_invocation(b), nir_first_invocation(b));
   }

   case nir_intrinsic_shuffle:
      if (options->lower_shuffle &&
          (!options->lower_boolean_shuffle || intrin->src[0].ssa->bit_size != 1))
         return lower_shuffle(b, intrin);
      else if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_subgroup_op_to_scalar(b, intrin);
      else if (options->lower_boolean_shuffle && intrin->src[0].ssa->bit_size == 1)
         return lower_boolean_shuffle(b, intrin, options);
      else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
         return lower_subgroup_op_to_32bit(b, intrin);
      break;
   case nir_intrinsic_shuffle_xor:
   case nir_intrinsic_shuffle_up:
   case nir_intrinsic_shuffle_down:
      if (options->lower_relative_shuffle &&
          (!options->lower_boolean_shuffle || intrin->src[0].ssa->bit_size != 1))
         return lower_to_shuffle(b, intrin, options);
      else if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_subgroup_op_to_scalar(b, intrin);
      else if (options->lower_boolean_shuffle && intrin->src[0].ssa->bit_size == 1)
         return lower_boolean_shuffle(b, intrin, options);
      else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
         return lower_subgroup_op_to_32bit(b, intrin);
      break;

   case nir_intrinsic_quad_broadcast:
   case nir_intrinsic_quad_swap_horizontal:
   case nir_intrinsic_quad_swap_vertical:
   case nir_intrinsic_quad_swap_diagonal:
      if (options->lower_quad ||
          (options->lower_quad_broadcast_dynamic &&
           intrin->intrinsic == nir_intrinsic_quad_broadcast &&
           !nir_src_is_const(intrin->src[1])))
         return lower_dynamic_quad_broadcast(b, intrin, options);
      else if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_subgroup_op_to_scalar(b, intrin);
      break;

   case nir_intrinsic_quad_vote_any:
      if (options->lower_quad_vote)
         return build_quad_vote_any(b, intrin->src[0].ssa, options);
      break;
   case nir_intrinsic_quad_vote_all:
      if (options->lower_quad_vote) {
         nir_def *not_src = nir_inot(b, intrin->src[0].ssa);
         nir_def *any_not = build_quad_vote_any(b, not_src, options);
         return nir_inot(b, any_not);
      }
      break;

   case nir_intrinsic_reduce: {
      nir_def *ret = NULL;
      /* A cluster size greater than the subgroup size is implemention defined */
      if (options->subgroup_size &&
          nir_intrinsic_cluster_size(intrin) >= options->subgroup_size) {
         nir_intrinsic_set_cluster_size(intrin, 0);
         ret = NIR_LOWER_INSTR_PROGRESS;
      }
      if (nir_intrinsic_cluster_size(intrin) == 1)
         return intrin->src[0].ssa;
      if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_subgroup_op_to_scalar(b, intrin);
      if (intrin->def.bit_size == 1 &&
          (options->lower_boolean_reduce || options->lower_reduce))
         return lower_boolean_reduce(b, intrin, options);
      if (options->lower_reduce)
         return lower_scan_reduce(b, intrin, options);
      return ret;
   }
   case nir_intrinsic_inclusive_scan:
   case nir_intrinsic_exclusive_scan:
      if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_subgroup_op_to_scalar(b, intrin);
      if (intrin->def.bit_size == 1 &&
          (options->lower_boolean_reduce || options->lower_reduce))
         return lower_boolean_reduce(b, intrin, options);
      if (options->lower_reduce)
         return lower_scan_reduce(b, intrin, options);
      break;

   case nir_intrinsic_rotate:
      if (options->lower_rotate_to_shuffle &&
          (!options->lower_boolean_shuffle || intrin->src[0].ssa->bit_size != 1))
         return lower_to_shuffle(b, intrin, options);
      else if (options->lower_to_scalar && intrin->num_components > 1)
         return lower_subgroup_op_to_scalar(b, intrin);
      else if (options->lower_boolean_shuffle && intrin->src[0].ssa->bit_size == 1)
         return lower_boolean_shuffle(b, intrin, options);
      else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
         return lower_subgroup_op_to_32bit(b, intrin);
      break;
   case nir_intrinsic_masked_swizzle_amd:
      if (options->lower_to_scalar && intrin->num_components > 1) {
         return lower_subgroup_op_to_scalar(b, intrin);
      } else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64) {
         return lower_subgroup_op_to_32bit(b, intrin);
      }
      break;

   default:
      break;
   }

   return NULL;
}

bool
nir_lower_subgroups(nir_shader *shader,
                    const nir_lower_subgroups_options *options)
{
   void *filter = options->filter ? options->filter : lower_subgroups_filter;
   return nir_shader_lower_instructions(shader, filter,
                                        lower_subgroups_instr,
                                        (void *)options);
}
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								/*
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								 * Copyright © 2023 Collabora, Ltd.
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								 * Copyright © 2017 Intel Corporation
 								 *
 								 * Permission is hereby granted, free of charge, to any person obtaining a
 								 * copy of this software and associated documentation files (the "Software"),
 								 * to deal in the Software without restriction, including without limitation
 								 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 								 * and/or sell copies of the Software, and to permit persons to whom the
 								 * Software is furnished to do so, subject to the following conditions:
 								 *
 								 * The above copyright notice and this permission notice (including the next
 								 * paragraph) shall be included in all copies or substantial portions of the
 								 * Software.
 								 *
 								 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 								 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 								 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 								 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 								 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 								 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 								 * IN THE SOFTWARE.
 								 */
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								#include "util/u_math.h"
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								#include "nir.h"
 								#include "nir_builder.h"
 								/**
 								 * \file nir_opt_intrinsics.c
 								 */
-												nir: lower 64bit subgroup shuffle intrinsics

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-04-10 16:07:27 +02:00
+								static nir_intrinsic_instr *
 								lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
 								                                      unsigned int component)
 								{
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *comp;
-												nir: lower 64bit subgroup shuffle intrinsics

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-04-10 16:07:27 +02:00
+								   if (component == 0)
 								      comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
 								   else
 								      comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
 								   nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
-												nir: Drop nir_dest

Instead, we replace every use of it with nir_def.  Most of this commit
was generated by sed:

   sed -i -e 's/dest.ssa/def/g' src/**/*.h src/**/*.c src/**/*.cpp

A few manual fixups were required in lima and the nir_legacy code.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24674>

											
										
										
											2023-08-14 11:56:00 -05:00
+								   nir_def_init(&intr->instr, &intr->def, 1, 32);
-												nir: lower 64bit subgroup shuffle intrinsics

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-04-10 16:07:27 +02:00
+								   intr->const_index[0] = intrin->const_index[0];
 								   intr->const_index[1] = intrin->const_index[1];
 								   intr->src[0] = nir_src_for_ssa(comp);
 								   if (nir_intrinsic_infos[intrin->intrinsic].num_srcs == 2)
-												nir: replace use of nir_src_copy()

Since 03b2c34793b6 nir_src_copy() no longer does anything useful,
it will be removed in the following patch.

Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24986>

											
										
										
											2023-09-06 13:56:09 +10:00
+								      intr->src[1] = nir_src_for_ssa(intrin->src[1].ssa);
-												nir: lower 64bit subgroup shuffle intrinsics

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-04-10 16:07:27 +02:00
 								   intr->num_components = 1;
 								   nir_builder_instr_insert(b, &intr->instr);
 								   return intr;
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir: lower 64bit subgroup shuffle intrinsics

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-04-10 16:07:27 +02:00
+								lower_subgroup_op_to_32bit(nir_builder *b, nir_intrinsic_instr *intrin)
 								{
 								   assert(intrin->src[0].ssa->bit_size == 64);
 								   nir_intrinsic_instr *intr_x = lower_subgroups_64bit_split_intrinsic(b, intrin, 0);
 								   nir_intrinsic_instr *intr_y = lower_subgroups_64bit_split_intrinsic(b, intrin, 1);
-												nir: Drop nir_dest

Instead, we replace every use of it with nir_def.  Most of this commit
was generated by sed:

   sed -i -e 's/dest.ssa/def/g' src/**/*.h src/**/*.c src/**/*.cpp

A few manual fixups were required in lima and the nir_legacy code.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24674>

											
										
										
											2023-08-14 11:56:00 -05:00
+								   return nir_pack_64_2x32_split(b, &intr_x->def, &intr_y->def);
-												nir: lower 64bit subgroup shuffle intrinsics

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-04-10 16:07:27 +02:00
+								}
-												nir/lower_subgroups: move up some helper functions

build_subgroup_mask and build_ballot_imm_ishl will be needed by other
functions higher-up the file.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								/* Return a mask which is 1 for threads up to the run-time subgroup size, i.e.
 								 * 1 for the entire subgroup. SPIR-V requires us to return 0 for indices at or
 								 * above the subgroup size for the masks, but gt_mask and ge_mask make them 1
 								 * so we have to "and" with this mask.
 								 */
 								static nir_def *
 								build_subgroup_mask(nir_builder *b,
 								                    const nir_lower_subgroups_options *options)
 								{
 								   nir_def *subgroup_size = nir_load_subgroup_size(b);
 								   /* First compute the result assuming one ballot component. */
 								   nir_def *result =
 								      nir_ushr(b, nir_imm_intN_t(b, ~0ull, options->ballot_bit_size),
 								               nir_isub_imm(b, options->ballot_bit_size,
 								                            subgroup_size));
 								   /* Since the subgroup size and ballot bitsize are both powers of two, there
 								    * are two possible cases to consider:
 								    *
 								    * (1) The subgroup size is less than the ballot bitsize. We need to return
 								    * "result" in the first component and 0 in every other component.
 								    * (2) The subgroup size is a multiple of the ballot bitsize. We need to
 								    * return ~0 if the subgroup size divided by the ballot bitsize is less
 								    * than or equal to the index in the vector and 0 otherwise. For example,
 								    * with a target ballot type of 4 x uint32 and subgroup_size = 64 we'd need
 								    * to return { ~0, ~0, 0, 0 }.
 								    *
 								    * In case (2) it turns out that "result" will be ~0, because
 								    * "ballot_bit_size - subgroup_size" is also a multiple of
 								    * "ballot_bit_size" and since nir_ushr masks the shift value it will
 								    * shifted by 0. This means that the first component can just be "result"
 								    * in all cases.  The other components will also get the correct value in
 								    * case (1) if we just use the rule in case (2), so we'll get the correct
 								    * result if we just follow (2) and then replace the first component with
 								    * "result".
 								    */
 								   nir_const_value min_idx[4];
 								   for (unsigned i = 0; i < options->ballot_components; i++)
 								      min_idx[i] = nir_const_value_for_int(i * options->ballot_bit_size, 32);
 								   nir_def *min_idx_val = nir_build_imm(b, options->ballot_components, 32, min_idx);
 								   nir_def *result_extended =
 								      nir_pad_vector_imm_int(b, result, ~0ull, options->ballot_components);
 								   return nir_bcsel(b, nir_ult(b, min_idx_val, subgroup_size),
 								                    result_extended, nir_imm_intN_t(b, 0, options->ballot_bit_size));
 								}
 								/* Return a ballot-mask-sized value which represents "val" sign-extended and
 								 * then shifted left by "shift". Only particular values for "val" are
 								 * supported, see below.
 								 *
 								 * This function assumes that `val << shift` will never span a ballot_bit_size
 								 * word and that the high bit of val can be extended across the entire result.
 								 * This is trivially satisfied for 0, 1, ~0, and ~1.  However, it may also be
 								 * fine for other values if the shift is guaranteed to be sufficiently
 								 * aligned.  One example is 0xf when the shift is known to be a multiple of 4.
 								 */
 								static nir_def *
 								build_ballot_imm_ishl(nir_builder *b, int64_t val, nir_def *shift,
 								                      const nir_lower_subgroups_options *options)
 								{
 								   /* First compute the result assuming one ballot component. */
 								   nir_def *result =
 								      nir_ishl(b, nir_imm_intN_t(b, val, options->ballot_bit_size), shift);
 								   if (options->ballot_components == 1)
 								      return result;
 								   /* Fix up the result when there is > 1 component. The idea is that nir_ishl
 								    * masks out the high bits of the shift value already, so in case there's
 								    * more than one component the component which 1 would be shifted into
 								    * already has the right value and all we have to do is fixup the other
 								    * components. Components below it should always be 0, and components above
 								    * it must be either 0 or ~0 because of the assert above. For example, if
 								    * the target ballot size is 2 x uint32, and we're shifting 1 by 33, then
 								    * we'll feed 33 into ishl, which will mask it off to get 1, so we'll
 								    * compute a single-component result of 2, which is correct for the second
 								    * component, but the first component needs to be 0, which we get by
 								    * comparing the high bits of the shift with 0 and selecting the original
 								    * answer or 0 for the first component (and something similar with the
 								    * second component). This idea is generalized here for any component count
 								    */
 								   nir_const_value min_shift[4];
 								   for (unsigned i = 0; i < options->ballot_components; i++)
 								      min_shift[i] = nir_const_value_for_int(i * options->ballot_bit_size, 32);
 								   nir_def *min_shift_val = nir_build_imm(b, options->ballot_components, 32, min_shift);
 								   nir_const_value max_shift[4];
 								   for (unsigned i = 0; i < options->ballot_components; i++)
 								      max_shift[i] = nir_const_value_for_int((i + 1) * options->ballot_bit_size, 32);
 								   nir_def *max_shift_val = nir_build_imm(b, options->ballot_components, 32, max_shift);
 								   return nir_bcsel(b, nir_ult(b, shift, max_shift_val),
 								                    nir_bcsel(b, nir_ult(b, shift, min_shift_val),
 								                              nir_imm_intN_t(b, val >> 63, result->bit_size),
 								                              result),
 								                    nir_imm_intN_t(b, 0, result->bit_size));
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
 								ballot_type_to_uint(nir_builder *b, nir_def *value,
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								                    const nir_lower_subgroups_options *options)
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								{
-												nir/lower_subgroups: relax ballot_type_to_uint

we can generate 32-bit scalar inverse_ballots from the boolean reduce lowering
which will blow up when trying to lower the resulting inverse_ballot with the
common lowering. but the assert can be quieted just fine.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:08:48 -04:00
+								   /* Allow internal generated ballots to pass through */
 								   if (value->num_components == options->ballot_components &&
 								       value->bit_size == options->ballot_bit_size)
 								      return value;
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								   /* Only the new-style SPIR-V subgroup instructions take a ballot result as
 								    * an argument, so we only use this on uvec4 types.
 								    */
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								   assert(value->num_components == 4 && value->bit_size == 32);
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								   return nir_extract_bits(b, &value, 1, 0, options->ballot_components,
 								                           options->ballot_bit_size);
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
 								uint_to_ballot_type(nir_builder *b, nir_def *value,
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
+								                    unsigned num_components, unsigned bit_size)
 								{
-												nir/lower_subgroups: Pad ballot values before bitcasting

Otherwise, if we cast from a uint32_t to a uint64_t, the bitcast will
fail before we pad.  This happens on Intel.

Fixes: e4e79de2a420 "nir/subgroups: Support > 1 ballot components"
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5045
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11786>

											
										
										
											2021-07-08 14:36:15 -05:00
+								   assert(util_is_power_of_two_nonzero(num_components));
 								   assert(util_is_power_of_two_nonzero(value->num_components));
 								   unsigned total_bits = bit_size * num_components;
 								   /* If the source doesn't have enough bits, zero-pad */
 								   if (total_bits > value->bit_size * value->num_components)
 								      value = nir_pad_vector_imm_int(b, value, 0, total_bits / value->bit_size);
-												nir/lower_subgroups: Handle down-casts in uint_to_ballot_type

This is required for Zink where the API ballot type is a uint64_t and
the "hardware" ballot type is uvec4.

Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11989>

											
										
										
											2021-07-20 17:14:56 -05:00
+								   value = nir_bitcast_vector(b, value, bit_size);
 								   /* If the source has too many components, truncate.  This can happen if,
 								    * for instance, we're implementing GL_ARB_shader_ballot or
 								    * VK_EXT_shader_subgroup_ballot which have 64-bit ballot values on an
 								    * architecture with a native 128-bit uvec4 ballot.  This comes up in Zink
 								    * for OpenGL on Vulkan.  It's the job of the driver calling this lowering
 								    * pass to ensure that it's restricted subgroup sizes sufficiently that we
 								    * have enough ballot bits.
 								    */
 								   if (value->num_components > num_components)
-												nir/builder: Add a nir_trim_vector helper

This pattern pops up a bunch and the semantics of nir_channels() aren't
very convenient much of the time.  Let's add a nir_trim_vector() which
matches nir_pad_vector().

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16309>

											
										
										
											2022-05-09 12:15:44 -05:00
+								      value = nir_trim_vector(b, value, num_components);
-												nir/lower_subgroups: Handle down-casts in uint_to_ballot_type

This is required for Zink where the API ballot type is a uint64_t and
the "hardware" ballot type is uvec4.

Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11989>

											
										
										
											2021-07-20 17:14:56 -05:00
 								   return value;
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
+								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
+								lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								{
 								   /* This is safe to call on scalar things but it would be silly */
-												nir: Drop nir_dest

Instead, we replace every use of it with nir_def.  Most of this commit
was generated by sed:

   sed -i -e 's/dest.ssa/def/g' src/**/*.h src/**/*.c src/**/*.cpp

A few manual fixups were required in lima and the nir_legacy code.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24674>

											
										
										
											2023-08-14 11:56:00 -05:00
+								   assert(intrin->def.num_components > 1);
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
-												treewide: Drop nir_ssa_for_src users

Via Coccinelle patch:

    @@
    expression b, s, n;
    @@

    -nir_ssa_for_src(b, *s, n)
    +s->ssa

    @@
    expression b, s, n;
    @@

    -nir_ssa_for_src(b, s, n)
    +s.ssa

Reviewed-by: Christian Gmeiner <cgmeiner@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25247>

											
										
										
											2023-09-15 10:57:20 -04:00
+								   nir_def *value = intrin->src[0].ssa;
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *reads[NIR_MAX_VEC_COMPONENTS];
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
 								   for (unsigned i = 0; i < intrin->num_components; i++) {
 								      nir_intrinsic_instr *chan_intrin =
 								         nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
-												nir: Drop nir_dest

Instead, we replace every use of it with nir_def.  Most of this commit
was generated by sed:

   sed -i -e 's/dest.ssa/def/g' src/**/*.h src/**/*.c src/**/*.cpp

A few manual fixups were required in lima and the nir_legacy code.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24674>

											
										
										
											2023-08-14 11:56:00 -05:00
+								      nir_def_init(&chan_intrin->instr, &chan_intrin->def, 1,
 								                   intrin->def.bit_size);
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      chan_intrin->num_components = 1;
 								      /* value */
 								      chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
 								      /* invocation */
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								      if (nir_intrinsic_infos[intrin->intrinsic].num_srcs > 1) {
 								         assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs == 2);
-												nir: replace use of nir_src_copy()

Since 03b2c34793b6 nir_src_copy() no longer does anything useful,
it will be removed in the following patch.

Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24986>

											
										
										
											2023-09-06 13:56:09 +10:00
+								         chan_intrin->src[1] = nir_src_for_ssa(intrin->src[1].ssa);
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								      }
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
-												nir: Add subgroup arithmetic reduction intrinsics

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-29 20:09:58 -07:00
+								      chan_intrin->const_index[0] = intrin->const_index[0];
 								      chan_intrin->const_index[1] = intrin->const_index[1];
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
+								      nir_builder_instr_insert(b, &chan_intrin->instr);
 								      reads[i] = &chan_intrin->def;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								   }
 								   return nir_vec(b, reads, intrin->num_components);
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir/lower_subgroups: Add scalarizing for vote_eq

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2017-08-28 19:55:34 -07:00
+								lower_vote_eq_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
 								{
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *value = intrin->src[0].ssa;
-												nir/lower_subgroups: Add scalarizing for vote_eq

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2017-08-28 19:55:34 -07:00
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *result = NULL;
-												nir/lower_subgroups: Add scalarizing for vote_eq

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2017-08-28 19:55:34 -07:00
+								   for (unsigned i = 0; i < intrin->num_components; i++) {
-												nir/lower_subgroups: use intrinsic builder more

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25501>

											
										
										
											2023-10-02 13:02:14 +02:00
+								      nir_def* chan = nir_channel(b, value, i);
 								      if (intrin->intrinsic == nir_intrinsic_vote_feq) {
 								         chan = nir_vote_feq(b, intrin->def.bit_size, chan);
 								      } else {
 								         chan = nir_vote_ieq(b, intrin->def.bit_size, chan);
 								      }
-												nir/lower_subgroups: Add scalarizing for vote_eq

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2017-08-28 19:55:34 -07:00
 								      if (result) {
-												nir/lower_subgroups: use intrinsic builder more

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25501>

											
										
										
											2023-10-02 13:02:14 +02:00
+								         result = nir_iand(b, result, chan);
-												nir/lower_subgroups: Add scalarizing for vote_eq

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2017-08-28 19:55:34 -07:00
+								      } else {
-												nir/lower_subgroups: use intrinsic builder more

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25501>

											
										
										
											2023-10-02 13:02:14 +02:00
+								         result = chan;
-												nir/lower_subgroups: Add scalarizing for vote_eq

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2017-08-28 19:55:34 -07:00
+								      }
 								   }
 								   return result;
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir/subgroups: Replace lower_vote_eq_to_ballot with lower_vote_eq

Lower it to a vote instead of a ballot. This was only used for AMD, and
in that case they're pretty much the same. However Qualcomm has a vote
builtin, which we want to use instead of ballots.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-11 13:07:48 +02:00
+								lower_vote_eq(nir_builder *b, nir_intrinsic_instr *intrin)
-												nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot

This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq
lowering pass." from Bas Nieuwenhuizen.  This version is a bit more
general since it's in common code.  It also properly handles NaN due to
not flipping the comparison for floats.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-03-10 10:05:58 -08:00
+								{
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *value = intrin->src[0].ssa;
-												nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot

This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq
lowering pass." from Bas Nieuwenhuizen.  This version is a bit more
general since it's in common code.  It also properly handles NaN due to
not flipping the comparison for floats.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-03-10 10:05:58 -08:00
 								   /* We have to implicitly lower to scalar */
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *all_eq = NULL;
-												nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot

This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq
lowering pass." from Bas Nieuwenhuizen.  This version is a bit more
general since it's in common code.  It also properly handles NaN due to
not flipping the comparison for floats.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-03-10 10:05:58 -08:00
+								   for (unsigned i = 0; i < intrin->num_components; i++) {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *rfi = nir_read_first_invocation(b, nir_channel(b, value, i));
-												nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot

This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq
lowering pass." from Bas Nieuwenhuizen.  This version is a bit more
general since it's in common code.  It also properly handles NaN due to
not flipping the comparison for floats.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-03-10 10:05:58 -08:00
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *is_eq;
-												nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot

This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq
lowering pass." from Bas Nieuwenhuizen.  This version is a bit more
general since it's in common code.  It also properly handles NaN due to
not flipping the comparison for floats.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-03-10 10:05:58 -08:00
+								      if (intrin->intrinsic == nir_intrinsic_vote_feq) {
-												nir: use intrinsic builders

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6587>

											
										
										
											2020-09-03 17:20:17 +01:00
+								         is_eq = nir_feq(b, rfi, nir_channel(b, value, i));
-												nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot

This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq
lowering pass." from Bas Nieuwenhuizen.  This version is a bit more
general since it's in common code.  It also properly handles NaN due to
not flipping the comparison for floats.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-03-10 10:05:58 -08:00
+								      } else {
-												nir: use intrinsic builders

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6587>

											
										
										
											2020-09-03 17:20:17 +01:00
+								         is_eq = nir_ieq(b, rfi, nir_channel(b, value, i));
-												nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot

This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq
lowering pass." from Bas Nieuwenhuizen.  This version is a bit more
general since it's in common code.  It also properly handles NaN due to
not flipping the comparison for floats.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-03-10 10:05:58 -08:00
+								      }
 								      if (all_eq == NULL) {
 								         all_eq = is_eq;
 								      } else {
 								         all_eq = nir_iand(b, all_eq, is_eq);
 								      }
 								   }
-												nir/subgroups: Replace lower_vote_eq_to_ballot with lower_vote_eq

Lower it to a vote instead of a ballot. This was only used for AMD, and
in that case they're pretty much the same. However Qualcomm has a vote
builtin, which we want to use instead of ballots.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-11 13:07:48 +02:00
+								   return nir_vote_all(b, 1, all_eq);
-												nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot

This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq
lowering pass." from Bas Nieuwenhuizen.  This version is a bit more
general since it's in common code.  It also properly handles NaN due to
not flipping the comparison for floats.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-03-10 10:05:58 -08:00
+								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
+								lower_shuffle_to_swizzle(nir_builder *b, nir_intrinsic_instr *intrin)
-												nir/lower_subgroups: add lower_shuffle_to_swizzle_amd

masked_swizzle_amd can be much faster than shuffle.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5695>

											
										
										
											2020-06-23 17:37:37 +01:00
+								{
 								   unsigned mask = nir_src_as_uint(intrin->src[1]);
 								   if (mask >= 32)
 								      return NULL;
-												nir/lower_subgroups: use intrinsic builder more

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25501>

											
										
										
											2023-10-02 13:02:14 +02:00
+								   return nir_masked_swizzle_amd(b, intrin->src[0].ssa,
-												nir: add fetch inactive index to quad_swizzle_amd/masked_swizzle_amd

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25525>

											
										
										
											2023-10-02 16:13:35 +01:00
+								                                 .swizzle_mask = (mask << 10) | 0x1f,
 								                                 .fetch_inactive = true);
-												nir/lower_subgroups: add lower_shuffle_to_swizzle_amd

masked_swizzle_amd can be much faster than shuffle.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5695>

											
										
										
											2020-06-23 17:37:37 +01:00
+								}
-												nir/lower_subgroups: Rename lower_shuffle to lower_relative_shuffle

This option only applies to relative shuffles (up/down/xor), and in a
moment we're going to add an option to lower normal shuffles, so rename
it.

While we're here, rename lower_shuffle() to lower_to_shuffle() for
similar reasons.

Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 14:31:29 +01:00
+								/* Lowers "specialized" shuffles to a generic nir_intrinsic_shuffle. */
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir/lower_subgroups: Rename lower_shuffle to lower_relative_shuffle

This option only applies to relative shuffles (up/down/xor), and in a
moment we're going to add an option to lower normal shuffles, so rename
it.

While we're here, rename lower_shuffle() to lower_to_shuffle() for
similar reasons.

Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 14:31:29 +01:00
+								lower_to_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
 								                 const nir_lower_subgroups_options *options)
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								{
-												nir/lower_subgroups: add lower_shuffle_to_swizzle_amd

masked_swizzle_amd can be much faster than shuffle.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5695>

											
										
										
											2020-06-23 17:37:37 +01:00
+								   if (intrin->intrinsic == nir_intrinsic_shuffle_xor &&
 								       options->lower_shuffle_to_swizzle_amd &&
 								       nir_src_is_const(intrin->src[1])) {
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
 								      nir_def *result = lower_shuffle_to_swizzle(b, intrin);
-												nir/lower_subgroups: add lower_shuffle_to_swizzle_amd

masked_swizzle_amd can be much faster than shuffle.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5695>

											
										
										
											2020-06-23 17:37:37 +01:00
+								      if (result)
 								         return result;
 								   }
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *index = nir_load_subgroup_invocation(b);
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								   switch (intrin->intrinsic) {
 								   case nir_intrinsic_shuffle_xor:
 								      index = nir_ixor(b, index, intrin->src[1].ssa);
 								      break;
 								   case nir_intrinsic_shuffle_up:
 								      index = nir_isub(b, index, intrin->src[1].ssa);
 								      break;
 								   case nir_intrinsic_shuffle_down:
 								      index = nir_iadd(b, index, intrin->src[1].ssa);
 								      break;
-												nir: Add quad operations and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-29 10:20:56 -07:00
+								   case nir_intrinsic_quad_broadcast:
-												nir: use imm-helpers

We have to use 1ull instead of 1u because MSVC is stupid...

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23855>

											
										
										
											2023-06-16 19:43:30 +02:00
+								      index = nir_ior(b, nir_iand_imm(b, index, ~0x3),
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								                      intrin->src[1].ssa);
-												nir: Add quad operations and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-29 10:20:56 -07:00
+								      break;
 								   case nir_intrinsic_quad_swap_horizontal:
 								      /* For Quad operations, subgroups are divided into quads where
 								       * (invocation % 4) is the index to a square arranged as follows:
 								       *
 								       *    +---+---+
 								       *    | 0 | 1 |
 								       *    +---+---+
 								       *    | 2 | 3 |
 								       *    +---+---+
 								       */
 								      index = nir_ixor(b, index, nir_imm_int(b, 0x1));
 								      break;
 								   case nir_intrinsic_quad_swap_vertical:
 								      index = nir_ixor(b, index, nir_imm_int(b, 0x2));
 								      break;
 								   case nir_intrinsic_quad_swap_diagonal:
 								      index = nir_ixor(b, index, nir_imm_int(b, 0x3));
 								      break;
-												nir/lower_subgroups: Add option lower_rotate_to_shuffle

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19797>

											
										
										
											2022-11-16 23:58:27 -08:00
+								   case nir_intrinsic_rotate: {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *delta = intrin->src[1].ssa;
 								      nir_def *local_id = nir_load_subgroup_invocation(b);
-												nir/lower_subgroups: Add option lower_rotate_to_shuffle

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19797>

											
										
										
											2022-11-16 23:58:27 -08:00
+								      const unsigned cluster_size = nir_intrinsic_cluster_size(intrin);
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *rotation_group_mask =
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								         cluster_size > 0 ? nir_imm_int(b, (int)(cluster_size - 1)) : nir_iadd_imm(b, nir_load_subgroup_size(b), -1);
-												nir/lower_subgroups: Add option lower_rotate_to_shuffle

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19797>

											
										
										
											2022-11-16 23:58:27 -08:00
 								      index = nir_iand(b, nir_iadd(b, local_id, delta),
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								                       rotation_group_mask);
-												nir/lower_subgroups: Add option lower_rotate_to_shuffle

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19797>

											
										
										
											2022-11-16 23:58:27 -08:00
+								      if (cluster_size > 0) {
 								         index = nir_iadd(b, index,
 								                          nir_iand(b, local_id, nir_inot(b, rotation_group_mask)));
 								      }
 								      break;
 								   }
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								   default:
 								      unreachable("Invalid intrinsic");
 								   }
-												nir/lower_subgroups: use intrinsic builder more

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25501>

											
										
										
											2023-10-02 13:02:14 +02:00
+								   return nir_shuffle(b, intrin->src[0].ssa, index);
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								}
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
+								static const struct glsl_type *
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								glsl_type_for_ssa(nir_def *def)
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
+								{
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								   const struct glsl_type *comp_type = def->bit_size == 1 ? glsl_bool_type() : glsl_uintN_t_type(def->bit_size);
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
+								   return glsl_replace_vector_type(comp_type, def->num_components);
 								}
 								/* Lower nir_intrinsic_shuffle to a waterfall loop + nir_read_invocation.
 								 */
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
+								lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin)
 								{
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *val = intrin->src[0].ssa;
 								   nir_def *id = intrin->src[1].ssa;
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
 								   /* The loop is something like:
 								    *
 								    * while (true) {
 								    *    first_id = readFirstInvocation(gl_SubgroupInvocationID);
 								    *    first_val = readFirstInvocation(val);
 								    *    first_result = readInvocation(val, readFirstInvocation(id));
 								    *    if (id == first_id)
 								    *       result = first_val;
 								    *    if (elect()) {
 								    *       if (id > gl_SubgroupInvocationID) {
 								    *          result = first_result;
 								    *       }
 								    *       break;
 								    *    }
 								    * }
 								    *
 								    * The idea is to guarantee, on each iteration of the loop, that anything
 								    * reading from first_id gets the correct value, so that we can then kill
 								    * it off by breaking out of the loop. Before doing that we also have to
 								    * ensure that first_id invocation gets the correct value. It only won't be
 								    * assigned the correct value already if the invocation it's reading from
 								    * isn't already killed off, that is, if it's later than its own ID.
 								    * Invocations where id <= gl_SubgroupInvocationID will be assigned their
 								    * result in the first if, and invocations where id >
 								    * gl_SubgroupInvocationID will be assigned their result in the second if.
 								    *
 								    * We do this more complicated loop rather than looping over all id's
 								    * explicitly because at this point we don't know the "actual" subgroup
 								    * size and at the moment there's no way to get at it, which means we may
 								    * loop over always-inactive invocations.
 								    */
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *subgroup_id = nir_load_subgroup_invocation(b);
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
 								   nir_variable *result =
 								      nir_local_variable_create(b->impl, glsl_type_for_ssa(val), "result");
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								   nir_loop *loop = nir_push_loop(b);
 								   {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *first_id = nir_read_first_invocation(b, subgroup_id);
 								      nir_def *first_val = nir_read_first_invocation(b, val);
 								      nir_def *first_result =
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
+								         nir_read_invocation(b, val, nir_read_first_invocation(b, id));
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								      nir_if *nif = nir_push_if(b, nir_ieq(b, id, first_id));
 								      {
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
+								         nir_store_var(b, result, first_val, BITFIELD_MASK(val->num_components));
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								      }
 								      nir_pop_if(b, nif);
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								      nir_if *nif2 = nir_push_if(b, nir_elect(b, 1));
 								      {
 								         nir_if *nif3 = nir_push_if(b, nir_ult(b, subgroup_id, id));
 								         {
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
+								            nir_store_var(b, result, first_result, BITFIELD_MASK(val->num_components));
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								         }
 								         nir_pop_if(b, nif3);
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
 								         nir_jump(b, nir_jump_break);
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								      }
 								      nir_pop_if(b, nif2);
 								   }
 								   nir_pop_loop(b, loop);
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
 								   return nir_load_var(b, result);
 								}
-												nir: add lowering for boolean shuffle

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27116>

											
										
										
											2024-01-17 14:12:43 +01:00
+								static nir_def *
 								lower_boolean_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
 								                      const nir_lower_subgroups_options *options)
 								{
 								   assert(options->ballot_components == 1 && options->subgroup_size);
 								   nir_def *ballot = nir_ballot_relaxed(b, 1, options->ballot_bit_size, intrin->src[0].ssa);
 								   nir_def *index = NULL;
 								   /* If the shuffle amount isn't constant, it might be divergent but
 								    * inverse_ballot requires a uniform source, so take a different path.
 								    * rotate allows us to assume the delta is uniform unlike shuffle_up/down.
 								    */
 								   switch (intrin->intrinsic) {
 								   case nir_intrinsic_shuffle_up:
 								      if (nir_src_is_const(intrin->src[1]))
 								         ballot = nir_ishl(b, ballot, intrin->src[1].ssa);
 								      else
 								         index = nir_isub(b, nir_load_subgroup_invocation(b), intrin->src[1].ssa);
 								      break;
 								   case nir_intrinsic_shuffle_down:
 								      if (nir_src_is_const(intrin->src[1]))
 								         ballot = nir_ushr(b, ballot, intrin->src[1].ssa);
 								      else
 								         index = nir_iadd(b, nir_load_subgroup_invocation(b), intrin->src[1].ssa);
 								      break;
 								   case nir_intrinsic_shuffle_xor:
 								      index = nir_ixor(b, nir_load_subgroup_invocation(b), intrin->src[1].ssa);
 								      break;
 								   case nir_intrinsic_rotate: {
 								      nir_def *delta = nir_as_uniform(b, intrin->src[1].ssa);
 								      uint32_t cluster_size = nir_intrinsic_cluster_size(intrin);
 								      cluster_size = cluster_size ? cluster_size : options->subgroup_size;
 								      cluster_size = MIN2(cluster_size, options->subgroup_size);
 								      if (cluster_size == 1) {
 								         return intrin->src[0].ssa;
 								      } else if (cluster_size == 2) {
 								         delta = nir_iand_imm(b, delta, cluster_size - 1);
 								         nir_def *lo = nir_iand_imm(b, nir_ushr_imm(b, ballot, 1), 0x5555555555555555ull);
 								         nir_def *hi = nir_iand_imm(b, nir_ishl_imm(b, ballot, 1), 0xaaaaaaaaaaaaaaaaull);
 								         ballot = nir_bcsel(b, nir_ine_imm(b, delta, 0), nir_ior(b, hi, lo), ballot);
 								      } else if (cluster_size == ballot->bit_size) {
 								         ballot = nir_uror(b, ballot, delta);
 								      } else if (cluster_size == 32) {
 								         nir_def *unpacked = nir_unpack_64_2x32(b, ballot);
 								         unpacked = nir_uror(b, unpacked, delta);
 								         ballot = nir_pack_64_2x32(b, unpacked);
 								      } else {
 								         delta = nir_iand_imm(b, delta, cluster_size - 1);
 								         nir_def *delta_rev = nir_isub_imm(b, cluster_size, delta);
 								         nir_def *mask = nir_mask(b, delta_rev, ballot->bit_size);
 								         for (uint32_t i = cluster_size; i < ballot->bit_size; i *= 2) {
 								            mask = nir_ior(b, nir_ishl_imm(b, mask, i), mask);
 								         }
 								         nir_def *lo = nir_iand(b, nir_ushr(b, ballot, delta), mask);
 								         nir_def *hi = nir_iand(b, nir_ishl(b, ballot, delta_rev), nir_inot(b, mask));
 								         ballot = nir_ior(b, lo, hi);
 								      }
 								      break;
 								   }
 								   case nir_intrinsic_shuffle:
 								      index = intrin->src[1].ssa;
 								      break;
 								   case nir_intrinsic_read_invocation:
 								      index = nir_as_uniform(b, intrin->src[1].ssa);
 								      break;
 								   default:
 								      unreachable("not a boolean shuffle");
 								   }
 								   if (index) {
 								      nir_def *mask = nir_ishl(b, nir_imm_intN_t(b, 1, ballot->bit_size), index);
 								      return nir_ine_imm(b, nir_iand(b, ballot, mask), 0);
 								   } else {
 								      return nir_inverse_ballot(b, 1, ballot);
 								   }
 								}
-												nir/subgroups: Add option to lower Boolean subgroup reductions

This will be useful for AMD, and probably Intel as well.

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/218>

											
										
										
											2019-02-04 12:55:32 +01:00
+								static nir_def *
 								vec_bit_count(nir_builder *b, nir_def *value)
 								{
 								   nir_def *vec_result = nir_bit_count(b, value);
 								   nir_def *result = nir_channel(b, vec_result, 0);
 								   for (unsigned i = 1; i < value->num_components; i++)
 								      result = nir_iadd(b, result, nir_channel(b, vec_result, i));
 								   return result;
 								}
 								/* produce a bitmask of 111...000...111... alternating between "size"
 								 * 1's and "size" 0's (the LSB is 1).
 								 */
 								static uint64_t
 								reduce_mask(unsigned size, unsigned ballot_bit_size)
 								{
 								   uint64_t mask = 0;
 								   for (unsigned i = 0; i < ballot_bit_size; i += 2 * size) {
 								      mask |= ((1ull << size) - 1) << i;
 								   }
 								   return mask;
 								}
 								/* operate on a uniform per-thread bitmask provided by ballot() to perform the
 								 * desired Boolean reduction. Assumes that the identity of the operation is
 								 * false (so, no iand).
 								 */
 								static nir_def *
 								lower_boolean_reduce_internal(nir_builder *b, nir_def *src,
 								                              unsigned cluster_size, nir_op op,
 								                              const nir_lower_subgroups_options *options)
 								{
 								   for (unsigned size = 1; size < cluster_size; size *= 2) {
 								      nir_def *shifted = nir_ushr_imm(b, src, size);
 								      src = nir_build_alu2(b, op, shifted, src);
 								      uint64_t mask = reduce_mask(size, options->ballot_bit_size);
 								      src = nir_iand_imm(b, src, mask);
 								      shifted = nir_ishl_imm(b, src, size);
 								      src = nir_ior(b, src, shifted);
 								   }
 								   return src;
 								}
 								/* operate on a uniform per-thread bitmask provided by ballot() to perform the
 								 * desired Boolean inclusive scan. Assumes that the identity of the operation is
 								 * false (so, no iand).
 								 */
 								static nir_def *
 								lower_boolean_scan_internal(nir_builder *b, nir_def *src,
 								                            nir_op op,
 								                            const nir_lower_subgroups_options *options)
 								{
 								   if (op == nir_op_ior) {
 								      /* We want to return a bitmask with all 1's starting at the first 1 in
 								       * src. -src is equivalent to ~src + 1. While src | ~src returns all
 								       * 1's, src | (~src + 1) returns all 1's except for the bits changed by
 								       * the increment. Any 1's before the least significant 0 of ~src are
 								       * turned into 0 (zeroing those bits after or'ing) and the least
 								       * signficant 0 of ~src is turned into 1 (not doing anything). So the
 								       * final output is what we want.
 								       */
 								      return nir_ior(b, src, nir_ineg(b, src));
 								   } else {
 								      assert(op == nir_op_ixor);
 								      for (unsigned shift = 1; shift < options->ballot_bit_size; shift *= 2) {
 								         src = nir_ixor(b, src, nir_ishl_imm(b, src, shift));
 								      }
 								      return src;
 								   }
 								}
 								static nir_def *
 								lower_boolean_reduce(nir_builder *b, nir_intrinsic_instr *intrin,
 								                     const nir_lower_subgroups_options *options)
 								{
 								   assert(intrin->num_components == 1);
 								   assert(options->ballot_components == 1);
 								   unsigned cluster_size =
 								      intrin->intrinsic == nir_intrinsic_reduce ? nir_intrinsic_cluster_size(intrin) : 0;
 								   nir_op op = nir_intrinsic_reduction_op(intrin);
 								   /* For certain cluster sizes, reductions of iand and ior can be implemented
 								    * more efficiently.
 								    */
 								   if (intrin->intrinsic == nir_intrinsic_reduce) {
 								      if (cluster_size == 0) {
 								         if (op == nir_op_iand)
 								            return nir_vote_all(b, 1, intrin->src[0].ssa);
 								         else if (op == nir_op_ior)
 								            return nir_vote_any(b, 1, intrin->src[0].ssa);
 								         else if (op == nir_op_ixor)
 								            return nir_i2b(b, nir_iand_imm(b, vec_bit_count(b, nir_ballot(b,
 								                                                                          options->ballot_components,
 								                                                                          options->ballot_bit_size,
 								                                                                          intrin->src[0].ssa)),
 ));
 								         else
 								            unreachable("bad boolean reduction op");
 								      }
 								      if (cluster_size == 4) {
 								         if (op == nir_op_iand)
 								            return nir_quad_vote_all(b, 1, intrin->src[0].ssa);
 								         else if (op == nir_op_ior)
 								            return nir_quad_vote_any(b, 1, intrin->src[0].ssa);
 								      }
 								   }
 								   nir_def *src = intrin->src[0].ssa;
 								   /* Apply DeMorgan's law to implement "and" reductions, since all the
 								    * lower_boolean_*_internal() functions assume an identity of 0 to make the
 								    * generated code shorter.
 								    */
 								   nir_op new_op = (op == nir_op_iand) ? nir_op_ior : op;
 								   if (op == nir_op_iand) {
 								      src = nir_inot(b, src);
 								   }
 								   nir_def *val = nir_ballot(b, options->ballot_components, options->ballot_bit_size, src);
 								   switch (intrin->intrinsic) {
 								   case nir_intrinsic_reduce:
 								      val = lower_boolean_reduce_internal(b, val, cluster_size, new_op, options);
 								      break;
 								   case nir_intrinsic_inclusive_scan:
 								      val = lower_boolean_scan_internal(b, val, new_op, options);
 								      break;
 								   case nir_intrinsic_exclusive_scan:
 								      val = lower_boolean_scan_internal(b, val, new_op, options);
 								      val = nir_ishl_imm(b, val, 1);
 								      break;
 								   default:
 								      unreachable("bad intrinsic");
 								   }
 								   if (op == nir_op_iand) {
 								      val = nir_inot(b, val);
 								   }
 								   return nir_inverse_ballot(b, 1, val);
 								}
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								static nir_def *
 								build_identity(nir_builder *b, unsigned bit_size, nir_op op)
 								{
 								   nir_const_value ident_const = nir_alu_binop_identity(op, bit_size);
 								   return nir_build_imm(b, 1, bit_size, &ident_const);
 								}
 								/* Implementation of scan/reduce that assumes a full subgroup */
 								static nir_def *
 								build_scan_full(nir_builder *b, nir_intrinsic_op op, nir_op red_op,
 								                nir_def *data, unsigned cluster_size)
 								{
 								   switch (op) {
 								   case nir_intrinsic_exclusive_scan:
 								   case nir_intrinsic_inclusive_scan: {
 								      for (unsigned i = 1; i < cluster_size; i *= 2) {
 								         nir_def *idx = nir_load_subgroup_invocation(b);
 								         nir_def *has_buddy = nir_ige_imm(b, idx, i);
 								         nir_def *buddy_data = nir_shuffle_up(b, data, nir_imm_int(b, i));
 								         nir_def *accum = nir_build_alu2(b, red_op, data, buddy_data);
 								         data = nir_bcsel(b, has_buddy, accum, data);
 								      }
 								      if (op == nir_intrinsic_exclusive_scan) {
 								         /* For exclusive scans, we need to shift one more time and fill in the
 								          * bottom channel with identity.
 								          */
 								         nir_def *idx = nir_load_subgroup_invocation(b);
 								         nir_def *has_buddy = nir_ige_imm(b, idx, 1);
 								         nir_def *buddy_data = nir_shuffle_up(b, data, nir_imm_int(b, 1));
 								         nir_def *identity = build_identity(b, data->bit_size, red_op);
 								         data = nir_bcsel(b, has_buddy, buddy_data, identity);
 								      }
 								      return data;
 								   }
 								   case nir_intrinsic_reduce: {
 								      for (unsigned i = 1; i < cluster_size; i *= 2) {
 								         nir_def *buddy_data = nir_shuffle_xor(b, data, nir_imm_int(b, i));
 								         data = nir_build_alu2(b, red_op, data, buddy_data);
 								      }
 								      return data;
 								   }
 								   default:
 								      unreachable("Unsupported scan/reduce op");
 								   }
 								}
 								/* Fully generic implementation of scan/reduce that takes a mask */
 								static nir_def *
 								build_scan_reduce(nir_builder *b, nir_intrinsic_op op, nir_op red_op,
 								                  nir_def *data, nir_def *mask, unsigned max_mask_bits,
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								                  const nir_lower_subgroups_options *options)
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								{
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								   nir_def *lt_mask = nir_load_subgroup_lt_mask(b, options->ballot_components,
 								                                                options->ballot_bit_size);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
 								   /* Mask of all channels whose values we need to accumulate.  Our own value
 								    * is already in accum, if inclusive, thanks to the initialization above.
 								    * We only need to consider lower indexed invocations.
 								    */
 								   nir_def *remaining = nir_iand(b, mask, lt_mask);
 								   for (unsigned i = 1; i < max_mask_bits; i *= 2) {
 								      /* At each step, our buddy channel is the first channel we have yet to
 								       * take into account in the accumulator.
 								       */
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								      nir_def *has_buddy = nir_bany_inequal(b, remaining, nir_imm_int(b, 0));
 								      nir_def *buddy = nir_ballot_find_msb(b, 32, remaining);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
 								      /* Accumulate with our buddy channel, if any */
 								      nir_def *buddy_data = nir_shuffle(b, data, buddy);
 								      nir_def *accum = nir_build_alu2(b, red_op, data, buddy_data);
 								      data = nir_bcsel(b, has_buddy, accum, data);
 								      /* We just took into account everything in our buddy's accumulator from
 								       * the previous step.  The only things remaining are whatever channels
 								       * were remaining for our buddy.
 								       */
 								      nir_def *buddy_remaining = nir_shuffle(b, remaining, buddy);
 								      remaining = nir_bcsel(b, has_buddy, buddy_remaining, nir_imm_int(b, 0));
 								   }
 								   switch (op) {
 								   case nir_intrinsic_exclusive_scan: {
 								      /* For exclusive scans, we need to shift one more time and fill in the
 								       * bottom channel with identity.
 								       *
 								       * Some of this will get CSE'd with the first step but that's okay. The
 								       * code is cleaner this way.
 								       */
 								      nir_def *lower = nir_iand(b, mask, lt_mask);
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								      nir_def *has_buddy = nir_bany_inequal(b, lower, nir_imm_int(b, 0));
 								      nir_def *buddy = nir_ballot_find_msb(b, 32, lower);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
 								      nir_def *buddy_data = nir_shuffle(b, data, buddy);
 								      nir_def *identity = build_identity(b, data->bit_size, red_op);
 								      return nir_bcsel(b, has_buddy, buddy_data, identity);
 								   }
 								   case nir_intrinsic_inclusive_scan:
 								      return data;
 								   case nir_intrinsic_reduce: {
 								      /* For reductions, we need to take the top value of the scan */
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								      nir_def *idx = nir_ballot_find_msb(b, 32, mask);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								      return nir_shuffle(b, data, idx);
 								   }
 								   default:
 								      unreachable("Unsupported scan/reduce op");
 								   }
 								}
-												nir/lower_subgroups: add build_cluster_mask helper

This functionality will become more complex in the next commit so
separate it into a helper function.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								static nir_def *
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								build_cluster_mask(nir_builder *b, unsigned cluster_size,
 								                   const nir_lower_subgroups_options *options)
-												nir/lower_subgroups: add build_cluster_mask helper

This functionality will become more complex in the next commit so
separate it into a helper function.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								{
 								   nir_def *idx = nir_load_subgroup_invocation(b);
 								   nir_def *cluster = nir_iand_imm(b, idx, ~(uint64_t)(cluster_size - 1));
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								   if (cluster_size <= options->ballot_bit_size) {
 								      return build_ballot_imm_ishl(b, BITFIELD_MASK(cluster_size), cluster,
 								                                   options);
 								   }
 								   /* Since the cluster size and the ballot bit size are both powers of 2,
 								    * cluster size will be a multiple of the ballot bit size. Therefore, each
 								    * ballot component will be either all ones or all zeros. Build a vec for
 								    * which each component holds the value of `cluster` for which the mask
 								    * should be all ones.
 								    */
 								   nir_const_value cluster_sel_const[4];
 								   assert(ARRAY_SIZE(cluster_sel_const) >= options->ballot_components);
 								   for (unsigned i = 0; i < options->ballot_components; i++) {
 								      unsigned cluster_val =
 								         ROUND_DOWN_TO(i * options->ballot_bit_size, cluster_size);
 								      cluster_sel_const[i] =
 								         nir_const_value_for_uint(cluster_val, options->ballot_bit_size);
 								   }
 								   nir_def *cluster_sel =
 								      nir_build_imm(b, options->ballot_components, options->ballot_bit_size,
 								                    cluster_sel_const);
 								   nir_def *ones = nir_imm_intN_t(b, -1, options->ballot_bit_size);
 								   nir_def *zeros = nir_imm_intN_t(b, 0, options->ballot_bit_size);
 								   return nir_bcsel(b, nir_ieq(b, cluster, cluster_sel), ones, zeros);
-												nir/lower_subgroups: add build_cluster_mask helper

This functionality will become more complex in the next commit so
separate it into a helper function.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								}
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								static nir_def *
 								lower_scan_reduce(nir_builder *b, nir_intrinsic_instr *intrin,
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								                  const nir_lower_subgroups_options *options)
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								{
 								   const nir_op red_op = nir_intrinsic_reduction_op(intrin);
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								   unsigned subgroup_size = options->subgroup_size;
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
 								   /* Grab the cluster size */
 								   unsigned cluster_size = subgroup_size;
 								   if (nir_intrinsic_has_cluster_size(intrin)) {
 								      cluster_size = nir_intrinsic_cluster_size(intrin);
 								      if (cluster_size == 0 || cluster_size > subgroup_size)
 								         cluster_size = subgroup_size;
 								   }
 								   /* Check if all invocations are active. If so, we use the fast path. */
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								   nir_def *mask = nir_ballot(b, options->ballot_components,
 								                              options->ballot_bit_size, nir_imm_true(b));
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
 								   nir_def *full, *partial;
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								   nir_push_if(b, nir_ball_iequal(b, mask, build_subgroup_mask(b, options)));
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								   {
 								      full = build_scan_full(b, intrin->intrinsic, red_op,
 								                             intrin->src[0].ssa, cluster_size);
 								   }
 								   nir_push_else(b, NULL);
 								   {
 								      /* Mask according to the cluster size */
 								      if (cluster_size < subgroup_size) {
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								         nir_def *cluster_mask = build_cluster_mask(b, cluster_size, options);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								         mask = nir_iand(b, mask, cluster_mask);
 								      }
 								      partial = build_scan_reduce(b, intrin->intrinsic, red_op,
 								                                  intrin->src[0].ssa, mask, cluster_size,
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								                                  options);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								   }
 								   nir_pop_if(b, NULL);
 								   return nir_if_phi(b, full, partial);
 								}
-												nir/lower_subgroups: Use the new generic NIR lowering helpers

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2019-07-11 13:04:05 -05:00
+								static bool
 								lower_subgroups_filter(const nir_instr *instr, const void *_options)
 								{
 								   return instr->type == nir_instr_type_intrinsic;
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								build_subgroup_eq_mask(nir_builder *b,
 								                       const nir_lower_subgroups_options *options)
 								{
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *subgroup_idx = nir_load_subgroup_invocation(b);
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
 								   return build_ballot_imm_ishl(b, 1, subgroup_idx, options);
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								build_subgroup_ge_mask(nir_builder *b,
 								                       const nir_lower_subgroups_options *options)
 								{
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *subgroup_idx = nir_load_subgroup_invocation(b);
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
 								   return build_ballot_imm_ishl(b, ~0ull, subgroup_idx, options);
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								build_subgroup_gt_mask(nir_builder *b,
 								                       const nir_lower_subgroups_options *options)
 								{
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *subgroup_idx = nir_load_subgroup_invocation(b);
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
 								   return build_ballot_imm_ishl(b, ~1ull, subgroup_idx, options);
 								}
-												nir: Add an option to lower quad vote

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31470>

											
										
										
											2024-09-30 17:56:26 -05:00
+								static nir_def *
 								build_subgroup_quad_mask(nir_builder *b,
 								                         const nir_lower_subgroups_options *options)
 								{
 								   nir_def *subgroup_idx = nir_load_subgroup_invocation(b);
 								   nir_def *quad_first_idx = nir_iand_imm(b, subgroup_idx, ~0x3);
 								   return build_ballot_imm_ishl(b, 0xf, quad_first_idx, options);
 								}
 								static nir_def *
 								build_quad_vote_any(nir_builder *b, nir_def *src,
 								                    const nir_lower_subgroups_options *options)
 								{
 								   nir_def *ballot = nir_ballot(b, options->ballot_components,
 								                                   options->ballot_bit_size,
 								                                   src);
 								   nir_def *mask = build_subgroup_quad_mask(b, options);
 								   return nir_ine_imm(b, nir_iand(b, ballot, mask), 0);
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
 								vec_find_lsb(nir_builder *b, nir_def *value)
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								{
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *vec_result = nir_find_lsb(b, value);
 								   nir_def *result = nir_imm_int(b, -1);
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								   for (int i = value->num_components - 1; i >= 0; i--) {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *channel = nir_channel(b, vec_result, i);
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								      /* result = channel >= 0 ? (i * bitsize + channel) : result */
-												nir: use generated immediate comparison helpers

This makes the code a bit less verbose, so let's use the helpers.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23393>

											
										
										
											2023-05-08 14:00:41 +02:00
+								      result = nir_bcsel(b, nir_ige_imm(b, channel, 0),
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								                         nir_iadd_imm(b, channel, i * value->bit_size),
 								                         result);
 								   }
 								   return result;
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
 								vec_find_msb(nir_builder *b, nir_def *value)
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								{
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *vec_result = nir_ufind_msb(b, value);
 								   nir_def *result = nir_imm_int(b, -1);
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								   for (unsigned i = 0; i < value->num_components; i++) {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *channel = nir_channel(b, vec_result, i);
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								      /* result = channel >= 0 ? (i * bitsize + channel) : result */
-												nir: use generated immediate comparison helpers

This makes the code a bit less verbose, so let's use the helpers.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23393>

											
										
										
											2023-05-08 14:00:41 +02:00
+								      result = nir_bcsel(b, nir_ige_imm(b, channel, 0),
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								                         nir_iadd_imm(b, channel, i * value->bit_size),
 								                         result);
 								   }
 								   return result;
-												nir/lower_subgroups: Properly lower masks when subgroup_size == 0

Instead of building a constant mask (which depends on knowing the
subgroup size), we build an expression.  Because the pass uses the
nir_shader_lower_instructions helper, subgroup lowering will be run on
any newly emitted instructions as well as the previously existing
instructions.  In particular, if the subgroup size is known, the newly
emitted subgroup_size intrinsic will get turned into a constant and a
later constant folding pass will clean it up.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-07-10 22:20:00 -05:00
+								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir: Add ability to lower non-const quad broadcasts to const ones.

Some hardware doesn't support subgroup shuffle, and on such hardware
it makes no sense to lower quad broadcasts to shuffle. Instead, let's
lower them to four const quad broadcasts, paired with bcsel instructions.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4147>

											
										
										
											2020-03-11 15:01:56 +01:00
+								lower_dynamic_quad_broadcast(nir_builder *b, nir_intrinsic_instr *intrin,
 								                             const nir_lower_subgroups_options *options)
 								{
 								   if (!options->lower_quad_broadcast_dynamic_to_const)
-												nir/lower_subgroups: Rename lower_shuffle to lower_relative_shuffle

This option only applies to relative shuffles (up/down/xor), and in a
moment we're going to add an option to lower normal shuffles, so rename
it.

While we're here, rename lower_shuffle() to lower_to_shuffle() for
similar reasons.

Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 14:31:29 +01:00
+								      return lower_to_shuffle(b, intrin, options);
-												nir: Add ability to lower non-const quad broadcasts to const ones.

Some hardware doesn't support subgroup shuffle, and on such hardware
it makes no sense to lower quad broadcasts to shuffle. Instead, let's
lower them to four const quad broadcasts, paired with bcsel instructions.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4147>

											
										
										
											2020-03-11 15:01:56 +01:00
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								   nir_def *dst = NULL;
-												nir: Add ability to lower non-const quad broadcasts to const ones.

Some hardware doesn't support subgroup shuffle, and on such hardware
it makes no sense to lower quad broadcasts to shuffle. Instead, let's
lower them to four const quad broadcasts, paired with bcsel instructions.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4147>

											
										
										
											2020-03-11 15:01:56 +01:00
 								   for (unsigned i = 0; i < 4; ++i) {
-												nir/lower_subgroups: use intrinsic builder more

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25501>

											
										
										
											2023-10-02 13:02:14 +02:00
+								      nir_def *qbcst = nir_quad_broadcast(b, intrin->src[0].ssa,
 								                                              nir_imm_int(b, i));
-												nir: Add ability to lower non-const quad broadcasts to const ones.

Some hardware doesn't support subgroup shuffle, and on such hardware
it makes no sense to lower quad broadcasts to shuffle. Instead, let's
lower them to four const quad broadcasts, paired with bcsel instructions.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4147>

											
										
										
											2020-03-11 15:01:56 +01:00
 								      if (i)
-												nir: use imm-helpers

We have to use 1ull instead of 1u because MSVC is stupid...

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23855>

											
										
										
											2023-06-16 19:43:30 +02:00
+								         dst = nir_bcsel(b, nir_ieq_imm(b, intrin->src[1].ssa, i),
-												nir/lower_subgroups: use intrinsic builder more

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25501>

											
										
										
											2023-10-02 13:02:14 +02:00
+								                         qbcst, dst);
-												nir: Add ability to lower non-const quad broadcasts to const ones.

Some hardware doesn't support subgroup shuffle, and on such hardware
it makes no sense to lower quad broadcasts to shuffle. Instead, let's
lower them to four const quad broadcasts, paired with bcsel instructions.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4147>

											
										
										
											2020-03-11 15:01:56 +01:00
+								      else
-												nir/lower_subgroups: use intrinsic builder more

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25501>

											
										
										
											2023-10-02 13:02:14 +02:00
+								         dst = qbcst;
-												nir: Add ability to lower non-const quad broadcasts to const ones.

Some hardware doesn't support subgroup shuffle, and on such hardware
it makes no sense to lower quad broadcasts to shuffle. Instead, let's
lower them to four const quad broadcasts, paired with bcsel instructions.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4147>

											
										
										
											2020-03-11 15:01:56 +01:00
+								   }
 								   return dst;
 								}
-												nir: Add a lower_first_invocation_to_ballot option to lower_subgroups

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25894>

											
										
										
											2023-10-23 10:28:30 -05:00
+								static nir_def *
 								lower_first_invocation_to_ballot(nir_builder *b, nir_intrinsic_instr *intrin,
 								                                 const nir_lower_subgroups_options *options)
 								{
 								   return nir_ballot_find_lsb(b, 32, nir_ballot(b, 4, 32, nir_imm_true(b)));
 								}
-												nir: Add a lower_read_first_invocation option to lower_subgroups

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25894>

											
										
										
											2023-10-23 10:33:14 -05:00
+								static nir_def *
 								lower_read_first_invocation(nir_builder *b, nir_intrinsic_instr *intrin)
 								{
 								   return nir_read_invocation(b, intrin->src[0].ssa, nir_first_invocation(b));
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir: Add read_invocation_cond_ir3 intrinsic

On qualcomm, we have shared registers similar to SGPR's on AMD. However,
there is no readlane or readfirstlane primitive. shared registers can
only be written to when just one lane is active. This means that we have
to lower readInvocation(val, id) to something like:

if (gl_SubgroupInvocation == id) {
    scalar_reg = val;
}

return scalar_reg;

However it's a bit difficult to actually get the value of
gl_SubgroupInvocation in the backend, because for compute it requires
some calculations and we don't have any CSE support in the backend. This
intrinsic lets us turn it into
"readInvocationCond(val, id == gl_SubgroupInvocation)" in NIR at which
point the backend code generation is a lot easier.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-14 10:14:55 +02:00
+								lower_read_invocation_to_cond(nir_builder *b, nir_intrinsic_instr *intrin)
 								{
-												nir: Drop nir_dest

Instead, we replace every use of it with nir_def.  Most of this commit
was generated by sed:

   sed -i -e 's/dest.ssa/def/g' src/**/*.h src/**/*.c src/**/*.cpp

A few manual fixups were required in lima and the nir_legacy code.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24674>

											
										
										
											2023-08-14 11:56:00 -05:00
+								   return nir_read_invocation_cond_ir3(b, intrin->def.bit_size,
-												nir: Add read_invocation_cond_ir3 intrinsic

On qualcomm, we have shared registers similar to SGPR's on AMD. However,
there is no readlane or readfirstlane primitive. shared registers can
only be written to when just one lane is active. This means that we have
to lower readInvocation(val, id) to something like:

if (gl_SubgroupInvocation == id) {
    scalar_reg = val;
}

return scalar_reg;

However it's a bit difficult to actually get the value of
gl_SubgroupInvocation in the backend, because for compute it requires
some calculations and we don't have any CSE support in the backend. This
intrinsic lets us turn it into
"readInvocationCond(val, id == gl_SubgroupInvocation)" in NIR at which
point the backend code generation is a lot easier.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-14 10:14:55 +02:00
+								                                       intrin->src[0].ssa,
 								                                       nir_ieq(b, intrin->src[1].ssa,
 								                                               nir_load_subgroup_invocation(b)));
 								}
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								static nir_def *
-												nir/lower_subgroups: Use the new generic NIR lowering helpers

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2019-07-11 13:04:05 -05:00
+								lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								{
-												nir/lower_subgroups: Use the new generic NIR lowering helpers

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2019-07-11 13:04:05 -05:00
+								   const nir_lower_subgroups_options *options = _options;
 								   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								   switch (intrin->intrinsic) {
 								   case nir_intrinsic_vote_any:
 								   case nir_intrinsic_vote_all:
 								      if (options->lower_vote_trivial)
-												treewide: Drop nir_ssa_for_src users

Via Coccinelle patch:

    @@
    expression b, s, n;
    @@

    -nir_ssa_for_src(b, *s, n)
    +s->ssa

    @@
    expression b, s, n;
    @@

    -nir_ssa_for_src(b, s, n)
    +s.ssa

Reviewed-by: Christian Gmeiner <cgmeiner@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25247>

											
										
										
											2023-09-15 10:57:20 -04:00
+								         return intrin->src[0].ssa;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      break;
-												nir: Generalize nir_intrinsic_vote_eq

The SPIR-V extension wants us to be able to do an AllEqual on any vector
or scalar type.  This has two implications:

 1) We need to be able to handle vectors so we switch the vote_eq
    intrinsics to be vectorized intrinsics.

 2) We need to handle floats which have different behavior with respect
    to +-0, NaN, etc. than the integer variant so we need two variants.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2017-08-28 17:33:33 -07:00
+								   case nir_intrinsic_vote_feq:
 								   case nir_intrinsic_vote_ieq:
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      if (options->lower_vote_trivial)
-												nir/builder: Add a nir_imm_true/false helpers

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2018-10-19 09:35:49 -05:00
+								         return nir_imm_true(b);
-												nir/lower_subgroups: Add scalarizing for vote_eq

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2017-08-28 19:55:34 -07:00
-												nir: Split nir_lower_subgroup_options::lower_vote_eq into two bits

On NVIDIA, we can do a vote_ieq on bool in one hardware op so we don't
want that lowered.  We do want to lower vote_feq and other vote_ieq,
though.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25894>

											
										
										
											2023-10-24 13:40:47 -05:00
+								      if (nir_src_bit_size(intrin->src[0]) == 1) {
 								         if (options->lower_vote_bool_eq)
 								            return lower_vote_eq(b, intrin);
 								      } else {
 								         if (options->lower_vote_eq)
 								            return lower_vote_eq(b, intrin);
 								      }
-												nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot

This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq
lowering pass." from Bas Nieuwenhuizen.  This version is a bit more
general since it's in common code.  It also properly handles NaN due to
not flipping the comparison for floats.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-03-10 10:05:58 -08:00
-												nir/lower_subgroups: Add scalarizing for vote_eq

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2017-08-28 19:55:34 -07:00
+								      if (options->lower_to_scalar && intrin->num_components > 1)
 								         return lower_vote_eq_to_scalar(b, intrin);
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      break;
-												nir,intel/compiler: Use a fixed subgroup size

The GL_ARB_shader_ballot spec says that gl_SubGroupSizeARB is declared
as a uniform.  This means that it cannot change across an invocation
such as a draw call or a compute dispatch.  For compute shaders, we're
ok because we only ever use one dispatch size.  For fragment, however,
the hardware dynamically chooses between SIMD8 and SIMD16 which violates
the spec.  Instead, let's just pick a subgroup size based on the shader
stage.  The fixed size we choose for compute shaders is a bit higher
than strictly needed but there's no real harm in that.  The advantage is
that, if they do anything interesting with the value, NIR will see it as
an immediate and can optimize better.

Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:57:56 -07:00
+								   case nir_intrinsic_load_subgroup_size:
 								      if (options->subgroup_size)
 								         return nir_imm_int(b, options->subgroup_size);
 								      break;
-												nir: Add a lower_first_invocation_to_ballot option to lower_subgroups

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25894>

											
										
										
											2023-10-23 10:28:30 -05:00
+								   case nir_intrinsic_first_invocation:
 								      if (options->subgroup_size == 1)
 								         return nir_imm_int(b, 0);
 								      if (options->lower_first_invocation_to_ballot)
 								         return lower_first_invocation_to_ballot(b, intrin, options);
 								      break;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								   case nir_intrinsic_read_invocation:
-												nir: Add read_invocation_cond_ir3 intrinsic

On qualcomm, we have shared registers similar to SGPR's on AMD. However,
there is no readlane or readfirstlane primitive. shared registers can
only be written to when just one lane is active. This means that we have
to lower readInvocation(val, id) to something like:

if (gl_SubgroupInvocation == id) {
    scalar_reg = val;
}

return scalar_reg;

However it's a bit difficult to actually get the value of
gl_SubgroupInvocation in the backend, because for compute it requires
some calculations and we don't have any CSE support in the backend. This
intrinsic lets us turn it into
"readInvocationCond(val, id == gl_SubgroupInvocation)" in NIR at which
point the backend code generation is a lot easier.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-14 10:14:55 +02:00
+								      if (options->lower_to_scalar && intrin->num_components > 1)
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
+								         return lower_subgroup_op_to_scalar(b, intrin);
-												nir: Add read_invocation_cond_ir3 intrinsic

On qualcomm, we have shared registers similar to SGPR's on AMD. However,
there is no readlane or readfirstlane primitive. shared registers can
only be written to when just one lane is active. This means that we have
to lower readInvocation(val, id) to something like:

if (gl_SubgroupInvocation == id) {
    scalar_reg = val;
}

return scalar_reg;

However it's a bit difficult to actually get the value of
gl_SubgroupInvocation in the backend, because for compute it requires
some calculations and we don't have any CSE support in the backend. This
intrinsic lets us turn it into
"readInvocationCond(val, id == gl_SubgroupInvocation)" in NIR at which
point the backend code generation is a lot easier.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-14 10:14:55 +02:00
-												nir: add lowering for boolean shuffle

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27116>

											
										
										
											2024-01-17 14:12:43 +01:00
+								      if (options->lower_boolean_shuffle && intrin->src[0].ssa->bit_size == 1)
 								         return lower_boolean_shuffle(b, intrin, options);
-												nir: Add read_invocation_cond_ir3 intrinsic

On qualcomm, we have shared registers similar to SGPR's on AMD. However,
there is no readlane or readfirstlane primitive. shared registers can
only be written to when just one lane is active. This means that we have
to lower readInvocation(val, id) to something like:

if (gl_SubgroupInvocation == id) {
    scalar_reg = val;
}

return scalar_reg;

However it's a bit difficult to actually get the value of
gl_SubgroupInvocation in the backend, because for compute it requires
some calculations and we don't have any CSE support in the backend. This
intrinsic lets us turn it into
"readInvocationCond(val, id == gl_SubgroupInvocation)" in NIR at which
point the backend code generation is a lot easier.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-14 10:14:55 +02:00
+								      if (options->lower_read_invocation_to_cond)
 								         return lower_read_invocation_to_cond(b, intrin);
 								      break;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								   case nir_intrinsic_read_first_invocation:
 								      if (options->lower_to_scalar && intrin->num_components > 1)
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
+								         return lower_subgroup_op_to_scalar(b, intrin);
-												nir: Add a lower_read_first_invocation option to lower_subgroups

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25894>

											
										
										
											2023-10-23 10:33:14 -05:00
 								      if (options->lower_read_first_invocation)
 								         return lower_read_first_invocation(b, intrin);
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      break;
 								   case nir_intrinsic_load_subgroup_eq_mask:
 								   case nir_intrinsic_load_subgroup_ge_mask:
 								   case nir_intrinsic_load_subgroup_gt_mask:
 								   case nir_intrinsic_load_subgroup_le_mask:
 								   case nir_intrinsic_load_subgroup_lt_mask: {
 								      if (!options->lower_subgroup_masks)
 								         return NULL;
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *val;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      switch (intrin->intrinsic) {
 								      case nir_intrinsic_load_subgroup_eq_mask:
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         val = build_subgroup_eq_mask(b, options);
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
+								         break;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      case nir_intrinsic_load_subgroup_ge_mask:
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         val = nir_iand(b, build_subgroup_ge_mask(b, options),
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								                        build_subgroup_mask(b, options));
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
+								         break;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      case nir_intrinsic_load_subgroup_gt_mask:
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         val = nir_iand(b, build_subgroup_gt_mask(b, options),
-												nir: clang-format src/compiler/nir/*.[ch]

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24382>

											
										
										
											2023-08-08 12:00:35 -05:00
+								                        build_subgroup_mask(b, options));
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
+								         break;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      case nir_intrinsic_load_subgroup_le_mask:
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         val = nir_inot(b, build_subgroup_gt_mask(b, options));
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
+								         break;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      case nir_intrinsic_load_subgroup_lt_mask:
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         val = nir_inot(b, build_subgroup_ge_mask(b, options));
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
+								         break;
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								      default:
 								         unreachable("you seriously can't tell this is unreachable?");
 								      }
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
 								      return uint_to_ballot_type(b, val,
-												nir: Drop nir_dest

Instead, we replace every use of it with nir_def.  Most of this commit
was generated by sed:

   sed -i -e 's/dest.ssa/def/g' src/**/*.h src/**/*.c src/**/*.cpp

A few manual fixups were required in lima and the nir_legacy code.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24674>

											
										
										
											2023-08-14 11:56:00 -05:00
+								                                 intrin->def.num_components,
 								                                 intrin->def.bit_size);
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
+								   }
 								   case nir_intrinsic_ballot: {
-												nir: Drop nir_dest

Instead, we replace every use of it with nir_def.  Most of this commit
was generated by sed:

   sed -i -e 's/dest.ssa/def/g' src/**/*.h src/**/*.c src/**/*.cpp

A few manual fixups were required in lima and the nir_legacy code.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24674>

											
										
										
											2023-08-14 11:56:00 -05:00
+								      if (intrin->def.num_components == options->ballot_components &&
 								          intrin->def.bit_size == options->ballot_bit_size)
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
+								         return NULL;
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *ballot =
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         nir_ballot(b, options->ballot_components, options->ballot_bit_size,
 								                    intrin->src[0].ssa);
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
-												nir: use intrinsic builders

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6587>

											
										
										
											2020-09-03 17:20:17 +01:00
+								      return uint_to_ballot_type(b, ballot,
-												nir: Drop nir_dest

Instead, we replace every use of it with nir_def.  Most of this commit
was generated by sed:

   sed -i -e 's/dest.ssa/def/g' src/**/*.h src/**/*.c src/**/*.cpp

A few manual fixups were required in lima and the nir_legacy code.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24674>

											
										
										
											2023-08-14 11:56:00 -05:00
+								                                 intrin->def.num_components,
 								                                 intrin->def.bit_size);
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								   }
-												nir/lower_subgroups: Lower ballot intrinsics to the specified bit size

Ballot intrinsics return a bitfield of subgroups.  In GLSL and some
SPIR-V extensions, they return a uint64_t.  In SPV_KHR_shader_ballot,
they return a uvec4.  Also, some back-ends would rather pass around
32-bit values because it's easier than messing with 64-bit all the time.
To solve this mess, we make nir_lower_subgroups take a new parameter
called ballot_bit_size and it lowers whichever thing it gets in from the
source language (uint64_t or uvec4) to a scalar with the specified
number of bits.  This replaces a chunk of the old lowering code.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 18:44:51 -07:00
-												nir/spirv: Add inverse_ballot intrinsic

This is actually a no-op on AMD, so we really don't want to lower it to
something more complicated.  There may be a more efficient way to do
this on Intel too. In addition, in the future we'll want to use this for
lowering boolean reduce operations, where the inverse ballot will
operate on the backend's "natural" ballot type as indicated by
options->ballot_bit_size, instead of uvec4 as produced by SPIR-V. In
total, there are now three possible lowerings we may have to perform:

- inverse_ballot with source type of uvec4 from SPIR-V to inverse_ballot
with natural source type, when the backend supports inverse_ballot
natively.
- inverse_ballot with source type of uvec4 from SPIR-V to arithmetic,
when the backend doesn't support inverse_ballot.
- inverse_ballot with natural source type from reduce operation, when
the backend doesn't support inverse_ballot.

Previously we just did the second lowering unconditionally in vtn, but
it's just a combination of the first and third. We add support here for
the first and third lowerings in nir_lower_subgroups, instead of simply
moving the second lowering, to avoid unnecessary churn.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:37:50 +01:00
+								   case nir_intrinsic_inverse_ballot:
 								      if (options->lower_inverse_ballot) {
 								         return nir_ballot_bitfield_extract(b, 1, intrin->src[0].ssa,
 								                                            nir_load_subgroup_invocation(b));
 								      } else if (intrin->src[0].ssa->num_components != options->ballot_components ||
 								                 intrin->src[0].ssa->bit_size != options->ballot_bit_size) {
 								         return nir_inverse_ballot(b, 1, ballot_type_to_uint(b, intrin->src[0].ssa, options));
 								      }
 								      break;
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								   case nir_intrinsic_ballot_bitfield_extract:
 								   case nir_intrinsic_ballot_bit_count_reduce:
 								   case nir_intrinsic_ballot_find_lsb:
 								   case nir_intrinsic_ballot_find_msb: {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *int_val = ballot_type_to_uint(b, intrin->src[0].ssa,
 								                                             options);
-												nir/lower_subgroups: Mask off unused bits in ballot ops

Thanks to VK_EXT_subgroup_size_control, we can end up with
gl_SubgroupSize being as low as 8 on Intel.

Fixes: d10de253097 "anv: Implement VK_EXT_subgroup_size_control"
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4694>

											
										
										
											2020-04-22 21:35:48 -05:00
 								      if (intrin->intrinsic != nir_intrinsic_ballot_bitfield_extract &&
 								          intrin->intrinsic != nir_intrinsic_ballot_find_lsb) {
 								         /* For OpGroupNonUniformBallotFindMSB, the SPIR-V Spec says:
 								          *
 								          *    "Find the most significant bit set to 1 in Value, considering
 								          *    only the bits in Value required to represent all bits of the
 								          *    group’s invocations.  If none of the considered bits is set to
 								          *    1, the result is undefined."
 								          *
 								          * It has similar text for the other three.  This means that, in case
 								          * the subgroup size is less than 32, we have to mask off the unused
 								          * bits.  If the subgroup size is fixed and greater than or equal to
 								          * 32, the mask will be 0xffffffff and nir_opt_algebraic will delete
 								          * the iand.
 								          *
 								          * We only have to worry about this for BitCount and FindMSB because
 								          * FindLSB counts from the bottom and BitfieldExtract selects
 								          * individual bits.  In either case, if run outside the range of
 								          * valid bits, we hit the undefined results case and we can return
 								          * anything we want.
 								          */
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         int_val = nir_iand(b, int_val, build_subgroup_mask(b, options));
-												nir/lower_subgroups: Mask off unused bits in ballot ops

Thanks to VK_EXT_subgroup_size_control, we can end up with
gl_SubgroupSize being as low as 8 on Intel.

Fixes: d10de253097 "anv: Implement VK_EXT_subgroup_size_control"
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4694>

											
										
										
											2020-04-22 21:35:48 -05:00
+								      }
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								      switch (intrin->intrinsic) {
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								      case nir_intrinsic_ballot_bitfield_extract: {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								         nir_def *idx = intrin->src[1].ssa;
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         if (int_val->num_components > 1) {
 								            /* idx will be truncated by nir_ushr, so we just need to select
 								             * the right component using the bits of idx that are truncated in
 								             * the shift.
 								             */
 								            int_val =
 								               nir_vector_extract(b, int_val,
 								                                  nir_udiv_imm(b, idx, int_val->bit_size));
 								         }
-												nir: Use nir_test_mask instead of i2b(iand)

Signed-off-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17242>

											
										
										
											2022-06-24 21:31:51 +02:00
+								         return nir_test_mask(b, nir_ushr(b, int_val, idx), 1);
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								      }
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								      case nir_intrinsic_ballot_bit_count_reduce:
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         return vec_bit_count(b, int_val);
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								      case nir_intrinsic_ballot_find_lsb:
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         return vec_find_lsb(b, int_val);
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								      case nir_intrinsic_ballot_find_msb:
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         return vec_find_msb(b, int_val);
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								      default:
 								         unreachable("you seriously can't tell this is unreachable?");
 								      }
 								   }
 								   case nir_intrinsic_ballot_bit_count_exclusive:
 								   case nir_intrinsic_ballot_bit_count_inclusive: {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *int_val = ballot_type_to_uint(b, intrin->src[0].ssa,
 								                                             options);
-												nir: lower ballot_bit_count_exclusive/inclusive to mbcnt_amd

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22783>

											
										
										
											2023-05-01 19:04:03 +02:00
+								      if (options->lower_ballot_bit_count_to_mbcnt_amd) {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								         nir_def *acc;
-												nir: lower ballot_bit_count_exclusive/inclusive to mbcnt_amd

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22783>

											
										
										
											2023-05-01 19:04:03 +02:00
+								         if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_exclusive) {
 								            acc = nir_imm_int(b, 0);
 								         } else {
 								            acc = nir_iand_imm(b, nir_u2u32(b, int_val), 0x1);
 								            int_val = nir_ushr_imm(b, int_val, 1);
 								         }
 								         return nir_mbcnt_amd(b, int_val, acc);
 								      }
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *mask;
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								      if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_inclusive) {
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         mask = nir_inot(b, build_subgroup_gt_mask(b, options));
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								      } else {
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								         mask = nir_inot(b, build_subgroup_ge_mask(b, options));
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								      }
-												nir/subgroups: Support > 1 ballot components

Qualcomm has a mode with a subgroup size of 128, so just emitting larger
integer operations and then lowering them later isn't an option. This
makes the pass able to handle the lowering itself, so that we don't have
to go down to 64-thread wavefronts when ballots are used.

(The GLSL and legacy SPIR-V extensions only support a maximum of 64
threads, but I guess we'll cross that bridge when we come to it...)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>

											
										
										
											2020-09-10 18:48:04 +02:00
+								      return vec_bit_count(b, nir_iand(b, int_val, mask));
-												nir: Add new SPIR-V ballot ALU intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-02 18:19:44 -07:00
+								   }
-												nir: Add new SPIR-V ballot intrinsics and lowering

Someone can make the lowering optional later if they want something
different for their hardware.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-05-09 16:44:13 -07:00
+								   case nir_intrinsic_elect: {
-												radv/aco,nir/lower_subgroups: don't lower elect

ACO can implement this better.

fossil-db (Navi):
Totals from 33 (0.02% of 135946) affected shaders:
SGPRs: 1736 -> 1744 (+0.46%)
VGPRs: 1680 -> 1656 (-1.43%)
CodeSize: 246160 -> 245916 (-0.10%); split: -0.14%, +0.04%
MaxWaves: 449 -> 461 (+2.67%)
Instrs: 48301 -> 48266 (-0.07%); split: -0.12%, +0.05%
Cycles: 469740 -> 469240 (-0.11%); split: -0.18%, +0.08%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6558>

											
										
										
											2020-09-01 16:35:24 +01:00
+								      if (!options->lower_elect)
 								         return NULL;
-												nir: use intrinsic builders

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6587>

											
										
										
											2020-09-03 17:20:17 +01:00
+								      return nir_ieq(b, nir_load_subgroup_invocation(b), nir_first_invocation(b));
-												nir: Add new SPIR-V ballot intrinsics and lowering

Someone can make the lowering optional later if they want something
different for their hardware.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-05-09 16:44:13 -07:00
+								   }
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								   case nir_intrinsic_shuffle:
-												nir: add lowering for boolean shuffle

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27116>

											
										
										
											2024-01-17 14:12:43 +01:00
+								      if (options->lower_shuffle &&
 								          (!options->lower_boolean_shuffle || intrin->src[0].ssa->bit_size != 1))
-												nir: Add support for lowering shuffle to a waterfall loop

Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 15:44:31 +01:00
+								         return lower_shuffle(b, intrin);
 								      else if (options->lower_to_scalar && intrin->num_components > 1)
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
+								         return lower_subgroup_op_to_scalar(b, intrin);
-												nir: add lowering for boolean shuffle

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27116>

											
										
										
											2024-01-17 14:12:43 +01:00
+								      else if (options->lower_boolean_shuffle && intrin->src[0].ssa->bit_size == 1)
 								         return lower_boolean_shuffle(b, intrin, options);
-												nir: lower 64bit subgroup shuffle intrinsics

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-04-10 16:07:27 +02:00
+								      else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
 								         return lower_subgroup_op_to_32bit(b, intrin);
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								      break;
 								   case nir_intrinsic_shuffle_xor:
 								   case nir_intrinsic_shuffle_up:
 								   case nir_intrinsic_shuffle_down:
-												nir: add lowering for boolean shuffle

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27116>

											
										
										
											2024-01-17 14:12:43 +01:00
+								      if (options->lower_relative_shuffle &&
 								          (!options->lower_boolean_shuffle || intrin->src[0].ssa->bit_size != 1))
-												nir/lower_subgroups: Rename lower_shuffle to lower_relative_shuffle

This option only applies to relative shuffles (up/down/xor), and in a
moment we're going to add an option to lower normal shuffles, so rename
it.

While we're here, rename lower_shuffle() to lower_to_shuffle() for
similar reasons.

Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>

											
										
										
											2022-01-04 14:31:29 +01:00
+								         return lower_to_shuffle(b, intrin, options);
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								      else if (options->lower_to_scalar && intrin->num_components > 1)
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
+								         return lower_subgroup_op_to_scalar(b, intrin);
-												nir: add lowering for boolean shuffle

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27116>

											
										
										
											2024-01-17 14:12:43 +01:00
+								      else if (options->lower_boolean_shuffle && intrin->src[0].ssa->bit_size == 1)
 								         return lower_boolean_shuffle(b, intrin, options);
-												nir: lower 64bit subgroup shuffle intrinsics

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

											
										
										
											2018-04-10 16:07:27 +02:00
+								      else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
 								         return lower_subgroup_op_to_32bit(b, intrin);
-												nir: Add subgroup shuffle intrinsics and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-12-06 21:41:47 -08:00
+								      break;
-												nir: Add quad operations and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-29 10:20:56 -07:00
+								   case nir_intrinsic_quad_broadcast:
 								   case nir_intrinsic_quad_swap_horizontal:
 								   case nir_intrinsic_quad_swap_vertical:
 								   case nir_intrinsic_quad_swap_diagonal:
-												anv,nir: Lower quad_broadcast with dynamic index in NIR

This is required for the subgroupBroadcastDynamicId feature that was
added in Vulkan 1.2.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-12-16 10:43:18 -06:00
+								      if (options->lower_quad ||
 								          (options->lower_quad_broadcast_dynamic &&
 								           intrin->intrinsic == nir_intrinsic_quad_broadcast &&
 								           !nir_src_is_const(intrin->src[1])))
-												nir: Add ability to lower non-const quad broadcasts to const ones.

Some hardware doesn't support subgroup shuffle, and on such hardware
it makes no sense to lower quad broadcasts to shuffle. Instead, let's
lower them to four const quad broadcasts, paired with bcsel instructions.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4147>

											
										
										
											2020-03-11 15:01:56 +01:00
+								         return lower_dynamic_quad_broadcast(b, intrin, options);
-												nir: Add quad operations and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-29 10:20:56 -07:00
+								      else if (options->lower_to_scalar && intrin->num_components > 1)
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
+								         return lower_subgroup_op_to_scalar(b, intrin);
-												nir: Add quad operations and lowering

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-29 10:20:56 -07:00
+								      break;
-												nir: Add an option to lower quad vote

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31470>

											
										
										
											2024-09-30 17:56:26 -05:00
+								   case nir_intrinsic_quad_vote_any:
 								      if (options->lower_quad_vote)
 								         return build_quad_vote_any(b, intrin->src[0].ssa, options);
 								      break;
 								   case nir_intrinsic_quad_vote_all:
 								      if (options->lower_quad_vote) {
 								         nir_def *not_src = nir_inot(b, intrin->src[0].ssa);
 								         nir_def *any_not = build_quad_vote_any(b, not_src, options);
 								         return nir_inot(b, any_not);
 								      }
 								      break;
-												nir/subgroups: Lower clustered reductions with cluster_size >= subgroup_size into reductions

The behavior for reductions with cluster_size >= subgroup_size is implementation defined.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2019-04-24 10:25:25 +01:00
+								   case nir_intrinsic_reduce: {
-												nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>

											
										
										
											2023-08-12 16:17:15 -04:00
+								      nir_def *ret = NULL;
-												nir/subgroups: Lower clustered reductions with cluster_size >= subgroup_size into reductions

The behavior for reductions with cluster_size >= subgroup_size is implementation defined.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2019-04-24 10:25:25 +01:00
+								      /* A cluster size greater than the subgroup size is implemention defined */
 								      if (options->subgroup_size &&
 								          nir_intrinsic_cluster_size(intrin) >= options->subgroup_size) {
 								         nir_intrinsic_set_cluster_size(intrin, 0);
 								         ret = NIR_LOWER_INSTR_PROGRESS;
 								      }
-												nir/lower_subgroups: optimize reductions with cluster_size == 1

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/218>

											
										
										
											2023-11-01 16:15:09 +01:00
+								      if (nir_intrinsic_cluster_size(intrin) == 1)
 								         return intrin->src[0].ssa;
-												nir/subgroups: Lower clustered reductions with cluster_size >= subgroup_size into reductions

The behavior for reductions with cluster_size >= subgroup_size is implementation defined.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2019-04-24 10:25:25 +01:00
+								      if (options->lower_to_scalar && intrin->num_components > 1)
-												nir/lower_subgroups: optimize reductions with cluster_size == 1

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/218>

											
										
										
											2023-11-01 16:15:09 +01:00
+								         return lower_subgroup_op_to_scalar(b, intrin);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								      if (intrin->def.bit_size == 1 &&
 								          (options->lower_boolean_reduce || options->lower_reduce))
-												nir/subgroups: Add option to lower Boolean subgroup reductions

This will be useful for AMD, and probably Intel as well.

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/218>

											
										
										
											2019-02-04 12:55:32 +01:00
+								         return lower_boolean_reduce(b, intrin, options);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								      if (options->lower_reduce)
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								         return lower_scan_reduce(b, intrin, options);
-												nir/subgroups: Lower clustered reductions with cluster_size >= subgroup_size into reductions

The behavior for reductions with cluster_size >= subgroup_size is implementation defined.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2019-04-24 10:25:25 +01:00
+								      return ret;
 								   }
-												nir: Add subgroup arithmetic reduction intrinsics

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-29 20:09:58 -07:00
+								   case nir_intrinsic_inclusive_scan:
 								   case nir_intrinsic_exclusive_scan:
 								      if (options->lower_to_scalar && intrin->num_components > 1)
-												nir/lower_subgroups: Don't do multiple lowerings at once

Since using nir_shader_lower_instructions(), instructions get revisited
before proceeding with the next one. This already guarantees that any
subsequent lowerings of those instructions happen during the same pass
of nir_lower_subgroups().

v2: use nir_shader_lower_instructions() instead of setting the cursor.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>

											
										
										
											2019-02-01 11:01:31 +01:00
+								         return lower_subgroup_op_to_scalar(b, intrin);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								      if (intrin->def.bit_size == 1 &&
 								          (options->lower_boolean_reduce || options->lower_reduce))
-												nir/subgroups: Add option to lower Boolean subgroup reductions

This will be useful for AMD, and probably Intel as well.

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/218>

											
										
										
											2019-02-04 12:55:32 +01:00
+								         return lower_boolean_reduce(b, intrin, options);
-												nir/lower_subgroups: add generic scan/reduce lower

this is the lowering from NAK, fixed up for common code. the existing code is
used for boolean scan/reduce. I make no guarantee that this works for subgroup
sizes other than 32.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:53 -04:00
+								      if (options->lower_reduce)
-												nir/lower_subgroups: scan/reduce for multiple ballot components

lower_scan_reduce only worked when ballot_components equals one. This
commit adds support for arbitrary ballot_components.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31587>

											
										
										
											2024-10-17 21:44:39 +02:00
+								         return lower_scan_reduce(b, intrin, options);
-												nir: Add subgroup arithmetic reduction intrinsics

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-29 20:09:58 -07:00
+								      break;
-												nir/lower_subgroups: Add option lower_rotate_to_shuffle

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19797>

											
										
										
											2022-11-16 23:58:27 -08:00
+								   case nir_intrinsic_rotate:
-												nir: remove rotate scope

All other subgroup operations do not have a scope in NIR, so for consistency
rotate shouldn't have one either.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27964>

											
										
										
											2024-03-04 15:04:47 +01:00
+								      if (options->lower_rotate_to_shuffle &&
 								          (!options->lower_boolean_shuffle || intrin->src[0].ssa->bit_size != 1))
 								         return lower_to_shuffle(b, intrin, options);
 								      else if (options->lower_to_scalar && intrin->num_components > 1)
 								         return lower_subgroup_op_to_scalar(b, intrin);
 								      else if (options->lower_boolean_shuffle && intrin->src[0].ssa->bit_size == 1)
 								         return lower_boolean_shuffle(b, intrin, options);
 								      else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
 								         return lower_subgroup_op_to_32bit(b, intrin);
-												nir/lower_subgroups: Add option lower_rotate_to_shuffle

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19797>

											
										
										
											2022-11-16 23:58:27 -08:00
+								      break;
-												nir: scalarize masked_swizzle_amd created from shuffle_xor

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9901
Fixes: 0ef87f148df ("nir/lower_subgroups: Don't do multiple lowerings at once")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25468>

											
										
										
											2023-09-29 08:08:15 +02:00
+								   case nir_intrinsic_masked_swizzle_amd:
 								      if (options->lower_to_scalar && intrin->num_components > 1) {
 								         return lower_subgroup_op_to_scalar(b, intrin);
 								      } else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64) {
 								         return lower_subgroup_op_to_32bit(b, intrin);
 								      }
 								      break;
-												nir/lower_subgroups: Add option lower_rotate_to_shuffle

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19797>

											
										
										
											2022-11-16 23:58:27 -08:00
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								   default:
 								      break;
 								   }
 								   return NULL;
 								}
 								bool
 								nir_lower_subgroups(nir_shader *shader,
 								                    const nir_lower_subgroups_options *options)
 								{
-												nir/lower_subgroups: add filter

this will be useful for AGX, which has many reductions (but not all) in
hardware with the logic too backend-specific to encode with bitflags.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28993>

											
										
										
											2024-05-06 22:31:14 -04:00
+								   void *filter = options->filter ? options->filter : lower_subgroups_filter;
 								   return nir_shader_lower_instructions(shader, filter,
-												nir/lower_subgroups: Use the new generic NIR lowering helpers

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2019-07-11 13:04:05 -05:00
+								                                        lower_subgroups_instr,
 								                                        (void *)options);
-												nir: Add a new subgroups lowering pass

This commit pulls nir_lower_read_invocations_to_scalar along with most
of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
into a new nir_lower_subgroups pass.  There are various other bits of
subgroup lowering that we're going to want to do so it makes a bit more
sense to keep it all together in one pass.  We also move it in i965 to
happen after nir_lower_system_values to ensure that because we want to
handle the subgroup mask system value intrinsics here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-22 13:23:59 -07:00
+								}