nir/algebraic: optimize pack_split(unpack(a).x, unpack(a).y) -> a

This is required to optimize FP64 and Int64 shaders generated by virglrenderer. It generates pack/unpack around every 64-bit op, which NIR currently can't eliminate. This fixes that. There is a new constraint ".y", which means that the use of an instruction should have swizzle.y. This allows us to add patterns that have Y swizzle on results of instructions. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32172>
2026-01-06 11:00:11 +01:00 · 2024-11-17 08:45:48 -05:00 · 2024-11-17 08:45:48 -05:00 · 3800f0af41
commit 3800f0af41
parent b1bc691b0f
4 changed files with 19 additions and 5 deletions
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@ -208,6 +208,7 @@ class Value(object):
      ${'true' if val.nsz else 'false'},
      ${'true' if val.nnan else 'false'},
      ${'true' if val.ninf else 'false'},
+      ${'true' if val.swizzle_y else 'false'},
      ${val.c_opcode()},
      ${val.comm_expr_idx}, ${val.comm_exprs},
      { ${', '.join(src.array_index for src in val.sources)} },
@ -361,7 +362,7 @@ class Variable(Value):
      return '{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}'

 _opcode_re = re.compile(r"(?P<inexact>~)?(?P<exact>!)?(?P<opcode>\w+)(?:@(?P<bits>\d+))?"
-                        r"(?P<cond>\([^\)]+\))?")
+                        r"(?P<cond>\([^\)]+\))?(?P<swizzle_y>\.y)?")

 class Expression(Value):
   def __init__(self, expr, name_base, varset, algebraic_pass):
@ -391,6 +392,7 @@ class Expression(Value):
      self.nsz = cond.pop('nsz', False)
      self.nnan = cond.pop('nnan', False)
      self.ninf = cond.pop('ninf', False)
+      self.swizzle_y = m.group('swizzle_y') is not None

      assert len(cond) <= 1
      self.cond = cond.popitem()[0] if cond else None
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@ -1849,6 +1849,7 @@ for pack, bits, compbits in [('pack_64_2x32', 64, 32), ('pack_32_2x16', 32, 16)]
        ((unpack, (pack + '_split', a, b)), ('vec2', a, b)),
        ((unpack, (pack, a)), a),
        ((pack + '_split', (unpack + '_split_x', a), (unpack + '_split_y', a)), a),
+        ((pack + '_split', (unpack, a), (unpack + '.y', a)), a),
        ((pack, ('vec2', (unpack + '_split_x', a), (unpack + '_split_y', a))), a),
        ((pack, (unpack, a)), a),
    ]
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@ -398,11 +398,19 @@ match_expression(const nir_algebraic_table *table, const nir_search_expression *
    * expression, we don't have the information right now to propagate that
    * swizzle through.  We can only properly propagate swizzles if the
    * instruction is vectorized.
+    *
+    * The only exception is swizzle_y, for which we have a special condition,
+    * so that we can do pack64_2x32_split(unpack(a).x, unpack(a).y) --> a.
    */
-   if (nir_op_infos[instr->op].output_size != 0) {
-      for (unsigned i = 0; i < num_components; i++) {
-         if (swizzle[i] != i)
-            return false;
+   if (expr->swizzle_y) {
+      if (num_components != 1 || swizzle[0] != 1)
+         return false;
+   } else {
+      if (nir_op_infos[instr->op].output_size != 0) {
+         for (unsigned i = 0; i < num_components; i++) {
+            if (swizzle[i] != i)
+               return false;
+         }
      }
   }

--- a/src/compiler/nir/nir_search.h
+++ b/src/compiler/nir/nir_search.h
@ -148,6 +148,9 @@ typedef struct {
   /** Replacement does not preserve infinities. */
   bool ninf : 1;

+   /** Whether the use of the instruction should have swizzle.y. */
+   bool swizzle_y : 1;
+
   /* One of nir_op or nir_search_op */
   uint16_t opcode : 13;