From c30c383d4d53735533b9fed1a5d57ce56058f5c0 Mon Sep 17 00:00:00 2001
From: Emma Anholt <emma@anholt.net>
Date: Tue, 13 Jan 2026 18:01:52 -0800
Subject: [PATCH] nir/opt_algebraic_tests: Allow testing of fdot*_replicated
 opcodes.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39369>
---
 src/compiler/nir/nir_algebraic.py             | 11 +++--
 src/compiler/nir/nir_opt_algebraic.py         | 22 +++++-----
 .../nir/tests/nir_algebraic_pattern_test.cpp  |  4 ++
 .../nir/tests/nir_algebraic_pattern_test.h    | 41 +++++++++++++++++++
 4 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
index b2fb06d6ea6..cfd7c1ace6b 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -1299,7 +1299,14 @@ TEST_F(${pass_name}_pattern_test, ${test_name})
 % for xform_def in xform_defs:
    ${xform_def}
 % endfor
-   nir_unit_test_assert_eq(b, ${search_def}, ${replace_def});
+<%
+   # Note that fdot_replicated replacements will generate more channels than the search
+   # side, and that's OK -- nir_opt_algebraic allows that in patterns.  But
+   # nir_unit_test_assert_eq wants equality.
+%>
+   unsigned mask = BITFIELD_MASK(MIN2(${search_def}->num_components, ${replace_def}->num_components));
+   nir_unit_test_assert_eq(b, nir_channels(b, ${search_def}, mask),
+                              nir_channels(b, ${replace_def}, mask));
 % for cond in expr_conds:
    ${cond}
 % endfor
@@ -1341,8 +1348,6 @@ def expression_is_unsupported(expr):
     broken_opcodes = [
         # medium precision means that the compiler can do whatever it wants which makes it unsuitable for testing.
         "f2fmp", "i2imp", "f2imp", "f2ump", "i2fmp", "u2fmp",
-        # _replicated OPs do not have nir_builder functions.
-        "fdot2_replicated", "fdot3_replicated", "fdot4_replicated", "fdph_replicated",
     ]
 
     if expr.opcode in broken_opcodes:
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 233db6496e0..9c3040c736f 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -3846,10 +3846,10 @@ late_optimizations.extend([
 
    (('fsqrt', ('fsat(is_used_once)', 'a(cannot_add_output_modifier)')), ('fsat', ('fsqrt', a))),
 
-   (('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates', TestStatus.UNSUPPORTED),
-   (('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates', TestStatus.UNSUPPORTED),
-   (('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates', TestStatus.UNSUPPORTED),
-   (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates', TestStatus.UNSUPPORTED),
+   (('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates'),
+   (('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates'),
+   (('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates'),
+   (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
 
    (('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)),
 
@@ -4145,9 +4145,9 @@ distribute_src_mods = [
    # Try to remove some spurious negations rather than pushing them down.
    (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
    (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
-   (('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b), 'true', TestStatus.UNSUPPORTED),
-   (('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b), 'true', TestStatus.UNSUPPORTED),
-   (('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b), 'true', TestStatus.UNSUPPORTED),
+   (('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)),
+   (('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)),
+   (('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b)),
    (('fneg(is_only_used_as_float)', ('fneg', a)), a),
 
    (('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
@@ -4162,13 +4162,13 @@ distribute_src_mods = [
    (('fneg', ('fmin(is_used_once)', a, b)), ('fmax', ('fneg', a), ('fneg', b))),
    (('fneg', ('fmax(is_used_once)', a, b)), ('fmin', ('fneg', a), ('fneg', b))),
 
-   (('fneg', ('fdot2_replicated(is_used_once)', a, b)), ('fdot2_replicated', ('fneg', a), b), 'true', TestStatus.UNSUPPORTED),
-   (('fneg', ('fdot3_replicated(is_used_once)', a, b)), ('fdot3_replicated', ('fneg', a), b), 'true', TestStatus.UNSUPPORTED),
-   (('fneg', ('fdot4_replicated(is_used_once)', a, b)), ('fdot4_replicated', ('fneg', a), b), 'true', TestStatus.UNSUPPORTED),
+   (('fneg', ('fdot2_replicated(is_used_once)', a, b)), ('fdot2_replicated', ('fneg', a), b), 'true', TestStatus.XFAIL), # -fdot2(-1, 0) replacement produces 0 instead of -0.
+   (('fneg', ('fdot3_replicated(is_used_once)', a, b)), ('fdot3_replicated', ('fneg', a), b), 'true', TestStatus.XFAIL),
+   (('fneg', ('fdot4_replicated(is_used_once)', a, b)), ('fdot4_replicated', ('fneg', a), b), 'true', TestStatus.XFAIL),
 
    # fdph works mostly like fdot, but to get the correct result, the negation
    # must be applied to the second source.
-   (('fneg', ('fdph_replicated(is_used_once)', a, b)), ('fdph_replicated', a, ('fneg', b)), 'true', TestStatus.UNSUPPORTED),
+   (('fneg', ('fdph_replicated(is_used_once)', a, b)), ('fdph_replicated', a, ('fneg', b)), 'true', TestStatus.XFAIL),
 
    (('fneg', ('fsign(is_used_once)', a)), ('fsign', ('fneg', a))),
    (('fabs', ('fsign(is_used_once)', a)), ('fsign', ('fabs', a))),
diff --git a/src/compiler/nir/tests/nir_algebraic_pattern_test.cpp b/src/compiler/nir/tests/nir_algebraic_pattern_test.cpp
index bd24e080a08..f5916c5db0d 100644
--- a/src/compiler/nir/tests/nir_algebraic_pattern_test.cpp
+++ b/src/compiler/nir/tests/nir_algebraic_pattern_test.cpp
@@ -287,6 +287,10 @@ evaluate_expression(nir_algebraic_pattern_test *test, nir_instr *instr)
          assert(intrinsic->src[0].ssa->bit_size == intrinsic->src[1].ssa->bit_size);
          uint32_t bit_size = intrinsic->src[0].ssa->bit_size;
 
+         /* Note: fdot*_replicates replacements generate more channels than the
+          * original pattern, but we care that the usable channels of the search
+          * expression match.
+          */
          assert(intrinsic->src[0].ssa->num_components == intrinsic->src[1].ssa->num_components);
          uint32_t num_components = intrinsic->src[0].ssa->num_components;
 
diff --git a/src/compiler/nir/tests/nir_algebraic_pattern_test.h b/src/compiler/nir/tests/nir_algebraic_pattern_test.h
index 407a1797f0c..76a7b41beb6 100644
--- a/src/compiler/nir/tests/nir_algebraic_pattern_test.h
+++ b/src/compiler/nir/tests/nir_algebraic_pattern_test.h
@@ -71,4 +71,45 @@ class nir_algebraic_pattern_test : public nir_test {
    std::vector<nir_const_value> tmp_values;
 };
 
+/* Builders that aren't auto-generated for nir_builder.h, due to not being
+ * having a defined dest size (3 or 4 components, independent of src args).
+ * Just pick 4 and get some coverage.
+ */
+static inline nir_def *
+nir_fdot_replicated(nir_builder *b, nir_op op, nir_def *x, nir_def *y)
+{
+   nir_alu_instr *alu = nir_alu_instr_create(b->shader, op);
+   alu->src[0].src = nir_src_for_ssa(x);
+   alu->src[1].src = nir_src_for_ssa(y);
+   alu->fp_math_ctrl = b->fp_math_ctrl;
+   nir_def_init(&alu->instr, &alu->def, 4, x->bit_size);
+   nir_builder_instr_insert(b, &alu->instr);
+
+   return &alu->def;
+}
+
+static inline nir_def *
+nir_fdot2_replicated(nir_builder *b, nir_def *x, nir_def *y)
+{
+   return nir_fdot_replicated(b, nir_op_fdot2_replicated, x, y);
+}
+
+static inline nir_def *
+nir_fdot3_replicated(nir_builder *b, nir_def *x, nir_def *y)
+{
+   return nir_fdot_replicated(b, nir_op_fdot3_replicated, x, y);
+}
+
+static inline nir_def *
+nir_fdot4_replicated(nir_builder *b, nir_def *x, nir_def *y)
+{
+   return nir_fdot_replicated(b, nir_op_fdot4_replicated, x, y);
+}
+
+static inline nir_def *
+nir_fdph_replicated(nir_builder *b, nir_def *x, nir_def *y)
+{
+   return nir_fdot_replicated(b, nir_op_fdph_replicated, x, y);
+}
+
 #endif