nir: remove duplicate alu channels in nir_opt_shrink_vectors

This will clean code like: vec3 32 ssa_8 = frcp ssa_7.www vec3 32 ssa_9 = fmul ssa_7.xyz, ssa_8 into vec1 32 ssa_8 = frcp ssa_7.w vec3 32 ssa_9 = fmul ssa_7.xyz, ssa_8.xxx This helps r300 driver because we can only do single channel for math ops at a time, so the first version would result in three frcp instructions. The nir_opt_shrink_vectors comments even claim the pass should be doing this, however it actually does it only for nir_op_vecx instructions, so extend this for generic alu instructions. RV530 shader-db: total instructions in shared programs: 135032 -> 133707 (-0.98%) instructions in affected programs: 46121 -> 44796 (-2.87%) helped: 452 HURT: 26 total temps in shared programs: 17051 -> 17033 (-0.11%) temps in affected programs: 1509 -> 1491 (-1.19%) helped: 91 HURT: 30 12.02->12.08 (+0.5%) fps gain in Unigine Sanctuary (n=5) with RV530 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7051 Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reiewed-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20213>
2026-02-22 22:50:32 +01:00 · 2022-12-04 17:34:37 +01:00 · 2022-12-04 17:34:37 +01:00 · cb7f201288
commit cb7f201288
parent 980df9ede1
1 changed files with 39 additions and 28 deletions
--- a/src/compiler/nir/nir_opt_shrink_vectors.c
+++ b/src/compiler/nir/nir_opt_shrink_vectors.c
@ -192,49 +192,60 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
      return false;

   unsigned mask = nir_ssa_def_components_read(def);
-   unsigned last_bit = util_last_bit(mask);
-   unsigned num_components = util_bitcount(mask);
-
-   unsigned rounded = round_up_components(num_components);
-   assert(rounded <= def->num_components);
-   num_components = rounded;
-
   /* return, if there is nothing to do */
-   if (mask == 0 || num_components == def->num_components)
+   if (mask == 0)
      return false;

-   const bool is_bitfield_mask = last_bit == num_components;
-   if (is_bitfield_mask) {
-      /* just reduce the number of components and return */
-      def->num_components = num_components;
-      instr->dest.write_mask = mask;
-      return true;
-   }
-
   uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
-   unsigned index = 0;
-   for (unsigned i = 0; i < last_bit; i++) {
+   unsigned num_components = 0;
+   bool progress = false;
+   for (unsigned i = 0; i < def->num_components; i++) {
      /* skip unused components */
      if (!((mask >> i) & 0x1))
         continue;

-      /* reswizzle the sources */
-      for (int k = 0; k < nir_op_infos[instr->op].num_inputs; k++) {
-         instr->src[k].swizzle[index] = instr->src[k].swizzle[i];
-         reswizzle[i] = index;
+      /* Try reuse a component with the same swizzles */
+      unsigned j;
+      for (j = 0; j < num_components; j++) {
+         bool duplicate_channel = true;
+         for (unsigned k = 0; k < nir_op_infos[instr->op].num_inputs; k++) {
+            if (nir_op_infos[instr->op].input_sizes[k] != 0 ||
+                instr->src[k].swizzle[i] != instr->src[k].swizzle[j]) {
+               duplicate_channel = false;
+               break;
+            }
+         }
+
+         if (duplicate_channel) {
+            reswizzle[i] = j;
+            progress = true;
+            break;
+         }
+      }
+
+      /* Otherwise, just append the value */
+      if (j == num_components) {
+         for (int k = 0; k < nir_op_infos[instr->op].num_inputs; k++) {
+            instr->src[k].swizzle[num_components] = instr->src[k].swizzle[i];
+         }
+         if (i != num_components)
+            progress = true;
+         reswizzle[i] = num_components++;
      }
-      index++;
   }
-   assert(index == num_components);
+
+   unsigned rounded = round_up_components(num_components);
+   assert(rounded <= def->num_components);

   /* update dest */
-   def->num_components = num_components;
-   instr->dest.write_mask = BITFIELD_MASK(num_components);
+   def->num_components = rounded;
+   instr->dest.write_mask = BITFIELD_MASK(rounded);

   /* update uses */
-   reswizzle_alu_uses(def, reswizzle);
+   if (progress)
+      reswizzle_alu_uses(def, reswizzle);

-   return true;
+   return progress;
 }

 static bool