nir/opt_large_constants: optimize small vector constant arrays

Foz-DB Navi48: Totals from 2956 (2.58% of 114655) affected shaders: MaxWaves: 85080 -> 85110 (+0.04%) Instrs: 5167735 -> 5170572 (+0.05%); split: -0.12%, +0.17% CodeSize: 28882716 -> 28867340 (-0.05%); split: -0.14%, +0.08% VGPRs: 164484 -> 164616 (+0.08%); split: -0.09%, +0.18% SpillSGPRs: 612 -> 611 (-0.16%) Latency: 35017837 -> 34391146 (-1.79%); split: -1.80%, +0.01% InvThroughput: 6336245 -> 6323807 (-0.20%); split: -0.49%, +0.29% VClause: 112504 -> 111117 (-1.23%); split: -1.32%, +0.09% SClause: 121125 -> 117618 (-2.90%); split: -3.04%, +0.15% Copies: 392203 -> 384977 (-1.84%); split: -1.88%, +0.04% Branches: 155578 -> 155376 (-0.13%); split: -0.13%, +0.01% PreSGPRs: 127654 -> 127205 (-0.35%); split: -0.39%, +0.04% PreVGPRs: 112486 -> 112449 (-0.03%); split: -0.04%, +0.00% VALU: 2577362 -> 2586379 (+0.35%); split: -0.00%, +0.35% SALU: 889569 -> 888472 (-0.12%); split: -1.01%, +0.89% VMEM: 167203 -> 165750 (-0.87%) SMEM: 190438 -> 187313 (-1.64%) VOPD: 194411 -> 194344 (-0.03%); split: +0.01%, -0.04% Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33002>
2026-05-07 09:18:04 +02:00 · 2026-03-18 13:42:48 +01:00 · 2026-03-18 13:42:48 +01:00 · a9f3efcae0
commit a9f3efcae0
parent f782524c36
2 changed files with 97 additions and 64 deletions
--- a/src/compiler/nir/nir_opt_large_constants.c
+++ b/src/compiler/nir/nir_opt_large_constants.c
@ -134,7 +134,8 @@ struct var_info {
   uint32_t constant_data_size;
   void *constant_data;

-   struct small_constant small_constant;
+   uint32_t num_components;
+   struct small_constant small_constant[NIR_MAX_VEC_COMPONENTS];
 };

 static int
@ -221,34 +222,10 @@ handle_constant_store(void *mem_ctx, struct var_info *info,
                      bit_size);
 }

-static void
-get_small_constant(struct var_info *info, glsl_type_size_align_func size_align)
+static bool
+get_small_constant_component(struct small_constant *info, uint32_t array_len,
+                             uint32_t bit_size, nir_const_value *values)
 {
-   if (!glsl_type_is_array(info->var->type))
-      return;
-
-   const struct glsl_type *elem_type = glsl_get_array_element(info->var->type);
-   if (!glsl_type_is_scalar(elem_type))
-      return;
-
-   uint32_t array_len = glsl_get_length(info->var->type);
-   uint32_t bit_size = glsl_get_bit_size(elem_type);
-
-   /* If our array is large, don't even bother */
-   if (array_len > 64)
-      return;
-
-   /* Skip cases that can be lowered to a bcsel ladder more efficiently. */
-   if (array_len <= 3)
-      return;
-
-   uint32_t elem_size, elem_align;
-   size_align(elem_type, &elem_size, &elem_align);
-   uint32_t stride = ALIGN_POT(elem_size, elem_align);
-
-   nir_const_value values[64];
-   read_const_values(values, info->constant_data, array_len, bit_size, stride);
-
   bool is_float = true;
   if (bit_size < 16) {
      is_float = false;
@ -282,53 +259,100 @@ get_small_constant(struct var_info *info, glsl_type_size_align_func size_align)
   used_bits = util_next_power_of_two(used_bits);

   if (used_bits * array_len > 64)
-      return;
-
-   info->is_small = true;
+      return false;

   for (unsigned i = 0; i < array_len; i++) {
      uint64_t u64_elem = is_float ? nir_const_value_as_float(values[i], bit_size)
                                   : nir_const_value_as_uint(values[i], bit_size);

-      info->small_constant.data |= u64_elem << (i * used_bits);
+      info->data |= u64_elem << (i * used_bits);
   }

   /* Limit bit_size >= 32 to avoid unnecessary conversions.  */
-   info->small_constant.bit_size =
+   info->bit_size =
      MAX2(util_next_power_of_two(used_bits * array_len), 32);
-   info->small_constant.is_float = is_float;
-   info->small_constant.bit_stride = used_bits;
+   info->is_float = is_float;
+   info->bit_stride = used_bits;
+   return true;
+}
+
+static void
+get_small_constant(struct var_info *info, glsl_type_size_align_func size_align)
+{
+   if (!glsl_type_is_array(info->var->type))
+      return;
+
+   const struct glsl_type *elem_type = glsl_get_array_element(info->var->type);
+   if (!glsl_type_is_scalar(elem_type) && !glsl_type_is_vector(elem_type))
+      return;
+
+   uint32_t array_len = glsl_get_length(info->var->type);
+   info->num_components = glsl_get_vector_elements(elem_type);
+   uint32_t bit_size = glsl_get_bit_size(elem_type);
+
+   /* If our array is large, don't even bother */
+   if (array_len > 64)
+      return;
+
+   /* Skip cases that can be lowered to a bcsel ladder more efficiently. */
+   if (array_len <= 3)
+      return;
+
+   uint32_t elem_size, elem_align;
+   size_align(elem_type, &elem_size, &elem_align);
+   const uint32_t stride = ALIGN_POT(elem_size, elem_align);
+
+   const uint32_t scalar_stride = bit_size == 1 ? 4 : bit_size / 8;
+
+   info->is_small = true;
+   for (unsigned c = 0; c < info->num_components; c++) {
+      nir_const_value values[64];
+      const void *data = info->constant_data;
+      data = (void *)(((uintptr_t)data) + scalar_stride * c);
+      read_const_values(values, data, array_len, bit_size, stride);
+
+      if (!get_small_constant_component(&info->small_constant[c], array_len,
+                                        bit_size, values)) {
+         info->is_small = false;
+         break;
+      }
+   }
 }

 static nir_def *
 build_small_constant_load(nir_builder *b, nir_deref_instr *deref,
                          struct var_info *info, glsl_type_size_align_func size_align)
 {
-   struct small_constant *constant = &info->small_constant;
-
-   nir_def *imm = nir_imm_intN_t(b, constant->data, constant->bit_size);
-
   assert(deref->deref_type == nir_deref_type_array);
-   nir_def *index = deref->arr.index.ssa;
+   nir_def *index = nir_u2u32(b, deref->arr.index.ssa);

-   nir_def *shift = nir_imul_imm(b, index, constant->bit_stride);
+   nir_def *ret[NIR_MAX_VEC_COMPONENTS];

-   nir_def *ret = nir_ushr(b, imm, nir_u2u32(b, shift));
-   ret = nir_iand_imm(b, ret, BITFIELD64_MASK(constant->bit_stride));
+   for (unsigned c = 0; c < info->num_components; c++) {
+      const struct small_constant *constant = &info->small_constant[c];
+      nir_def *imm = nir_imm_intN_t(b, constant->data, constant->bit_size);

-   const unsigned bit_size = glsl_get_bit_size(deref->type);
-   if (bit_size < 8) {
-      /* Booleans are special-cased to be 32-bit */
-      assert(glsl_type_is_boolean(deref->type));
-      ret = nir_ine_imm(b, ret, 0);
-   } else {
-      if (constant->is_float)
-         ret = nir_u2fN(b, ret, bit_size);
-      else if (bit_size != constant->bit_size)
-         ret = nir_u2uN(b, ret, bit_size);
+      nir_def *shift = nir_imul_imm(b, index, constant->bit_stride);
+
+      ret[c] = nir_ushr(b, imm, shift);
+      ret[c] = nir_iand_imm(b, ret[c], BITFIELD64_MASK(constant->bit_stride));
+
+      const unsigned bit_size = glsl_get_bit_size(deref->type);
+      if (bit_size < 8) {
+         /* Booleans are special-cased to be 32-bit */
+         assert(glsl_type_is_boolean(deref->type));
+         ret[c] = nir_ine_imm(b, ret[c], 0);
+      } else {
+         if (constant->is_float)
+            ret[c] = nir_u2fN(b, ret[c], bit_size);
+         else if (bit_size != constant->bit_size)
+            ret[c] = nir_u2uN(b, ret[c], bit_size);
+      }
   }

-   return ret;
+   if (info->num_components == 1)
+      return ret[0];
+   return nir_vec(b, ret, info->num_components);
 }

 /** Lower large constant variables to shader constant data
--- a/src/compiler/nir/tests/opt_large_constants_tests.cpp
+++ b/src/compiler/nir/tests/opt_large_constants_tests.cpp
@ -231,19 +231,28 @@ TEST_F(nir_large_constants_test, small_float_natural_numbers_including_zero_vec_
      workgroup_size: 1, 1, 1
      max_subgroup_size: 128
      min_subgroup_size: 1
-      constants: 64
      decl_function main () (entrypoint)

      impl main {
-          block b0:  // preds:
-          32    %0 = @load_workgroup_index
-          32    %1 = load_const (0x00000000)
-          32    %2 = load_const (0x00000003)
-          32    %3 = ishl %0, %2 (0x3)
-          32    %4 = iadd %1 (0x0), %3
-          32x2  %5 = @load_constant (%4) (base=0, range=64, access=none, align_mul=4, align_offset=0)
-                     @use (%5)
-                     // succs: b1
+          block b0:   // preds:
+          32     %0 = @load_workgroup_index
+          32     %1 = load_const (0x76543210 = 1985229328)
+          32     %2 = load_const (0x00000002)
+          32     %3 = ishl %0, %2 (0x2)
+          32     %4 = ushr %1 (0x76543210), %3
+          32     %5 = load_const (0x0000000f = 15)
+          32     %6 = iand %4, %5 (0xf)
+          32     %7 = u2f32 %6 // exact, preserve:sz
+          32     %8 = load_const (0x01234567 = 19088743)
+          32     %9 = load_const (0x00000002)
+          32    %10 = ishl %0, %9 (0x2)
+          32    %11 = ushr %8 (0x1234567), %10
+          32    %12 = load_const (0x0000000f = 15)
+          32    %13 = iand %11, %12 (0xf)
+          32    %14 = u2f32 %13 // exact, preserve:sz
+          32x2  %15 = vec2 %7, %14
+                      @use (%15)
+                      // succs: b1
          block b1:
      }
   )"));