From c30c383d4d53735533b9fed1a5d57ce56058f5c0 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Tue, 13 Jan 2026 18:01:52 -0800 Subject: [PATCH] nir/opt_algebraic_tests: Allow testing of fdot*_replicated opcodes. Part-of: --- src/compiler/nir/nir_algebraic.py | 11 +++-- src/compiler/nir/nir_opt_algebraic.py | 22 +++++----- .../nir/tests/nir_algebraic_pattern_test.cpp | 4 ++ .../nir/tests/nir_algebraic_pattern_test.h | 41 +++++++++++++++++++ 4 files changed, 64 insertions(+), 14 deletions(-) diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py index b2fb06d6ea6..cfd7c1ace6b 100644 --- a/src/compiler/nir/nir_algebraic.py +++ b/src/compiler/nir/nir_algebraic.py @@ -1299,7 +1299,14 @@ TEST_F(${pass_name}_pattern_test, ${test_name}) % for xform_def in xform_defs: ${xform_def} % endfor - nir_unit_test_assert_eq(b, ${search_def}, ${replace_def}); +<% + # Note that fdot_replicated replacements will generate more channels than the search + # side, and that's OK -- nir_opt_algebraic allows that in patterns. But + # nir_unit_test_assert_eq wants equality. +%> + unsigned mask = BITFIELD_MASK(MIN2(${search_def}->num_components, ${replace_def}->num_components)); + nir_unit_test_assert_eq(b, nir_channels(b, ${search_def}, mask), + nir_channels(b, ${replace_def}, mask)); % for cond in expr_conds: ${cond} % endfor @@ -1341,8 +1348,6 @@ def expression_is_unsupported(expr): broken_opcodes = [ # medium precision means that the compiler can do whatever it wants which makes it unsuitable for testing. "f2fmp", "i2imp", "f2imp", "f2ump", "i2fmp", "u2fmp", - # _replicated OPs do not have nir_builder functions. - "fdot2_replicated", "fdot3_replicated", "fdot4_replicated", "fdph_replicated", ] if expr.opcode in broken_opcodes: diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 233db6496e0..9c3040c736f 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -3846,10 +3846,10 @@ late_optimizations.extend([ (('fsqrt', ('fsat(is_used_once)', 'a(cannot_add_output_modifier)')), ('fsat', ('fsqrt', a))), - (('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates', TestStatus.UNSUPPORTED), - (('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates', TestStatus.UNSUPPORTED), - (('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates', TestStatus.UNSUPPORTED), - (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates', TestStatus.UNSUPPORTED), + (('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates'), + (('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates'), + (('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates'), + (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'), (('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)), @@ -4145,9 +4145,9 @@ distribute_src_mods = [ # Try to remove some spurious negations rather than pushing them down. (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)), (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)), - (('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b), 'true', TestStatus.UNSUPPORTED), - (('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b), 'true', TestStatus.UNSUPPORTED), - (('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b), 'true', TestStatus.UNSUPPORTED), + (('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)), + (('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)), + (('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b)), (('fneg(is_only_used_as_float)', ('fneg', a)), a), (('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)), @@ -4162,13 +4162,13 @@ distribute_src_mods = [ (('fneg', ('fmin(is_used_once)', a, b)), ('fmax', ('fneg', a), ('fneg', b))), (('fneg', ('fmax(is_used_once)', a, b)), ('fmin', ('fneg', a), ('fneg', b))), - (('fneg', ('fdot2_replicated(is_used_once)', a, b)), ('fdot2_replicated', ('fneg', a), b), 'true', TestStatus.UNSUPPORTED), - (('fneg', ('fdot3_replicated(is_used_once)', a, b)), ('fdot3_replicated', ('fneg', a), b), 'true', TestStatus.UNSUPPORTED), - (('fneg', ('fdot4_replicated(is_used_once)', a, b)), ('fdot4_replicated', ('fneg', a), b), 'true', TestStatus.UNSUPPORTED), + (('fneg', ('fdot2_replicated(is_used_once)', a, b)), ('fdot2_replicated', ('fneg', a), b), 'true', TestStatus.XFAIL), # -fdot2(-1, 0) replacement produces 0 instead of -0. + (('fneg', ('fdot3_replicated(is_used_once)', a, b)), ('fdot3_replicated', ('fneg', a), b), 'true', TestStatus.XFAIL), + (('fneg', ('fdot4_replicated(is_used_once)', a, b)), ('fdot4_replicated', ('fneg', a), b), 'true', TestStatus.XFAIL), # fdph works mostly like fdot, but to get the correct result, the negation # must be applied to the second source. - (('fneg', ('fdph_replicated(is_used_once)', a, b)), ('fdph_replicated', a, ('fneg', b)), 'true', TestStatus.UNSUPPORTED), + (('fneg', ('fdph_replicated(is_used_once)', a, b)), ('fdph_replicated', a, ('fneg', b)), 'true', TestStatus.XFAIL), (('fneg', ('fsign(is_used_once)', a)), ('fsign', ('fneg', a))), (('fabs', ('fsign(is_used_once)', a)), ('fsign', ('fabs', a))), diff --git a/src/compiler/nir/tests/nir_algebraic_pattern_test.cpp b/src/compiler/nir/tests/nir_algebraic_pattern_test.cpp index bd24e080a08..f5916c5db0d 100644 --- a/src/compiler/nir/tests/nir_algebraic_pattern_test.cpp +++ b/src/compiler/nir/tests/nir_algebraic_pattern_test.cpp @@ -287,6 +287,10 @@ evaluate_expression(nir_algebraic_pattern_test *test, nir_instr *instr) assert(intrinsic->src[0].ssa->bit_size == intrinsic->src[1].ssa->bit_size); uint32_t bit_size = intrinsic->src[0].ssa->bit_size; + /* Note: fdot*_replicates replacements generate more channels than the + * original pattern, but we care that the usable channels of the search + * expression match. + */ assert(intrinsic->src[0].ssa->num_components == intrinsic->src[1].ssa->num_components); uint32_t num_components = intrinsic->src[0].ssa->num_components; diff --git a/src/compiler/nir/tests/nir_algebraic_pattern_test.h b/src/compiler/nir/tests/nir_algebraic_pattern_test.h index 407a1797f0c..76a7b41beb6 100644 --- a/src/compiler/nir/tests/nir_algebraic_pattern_test.h +++ b/src/compiler/nir/tests/nir_algebraic_pattern_test.h @@ -71,4 +71,45 @@ class nir_algebraic_pattern_test : public nir_test { std::vector tmp_values; }; +/* Builders that aren't auto-generated for nir_builder.h, due to not being + * having a defined dest size (3 or 4 components, independent of src args). + * Just pick 4 and get some coverage. + */ +static inline nir_def * +nir_fdot_replicated(nir_builder *b, nir_op op, nir_def *x, nir_def *y) +{ + nir_alu_instr *alu = nir_alu_instr_create(b->shader, op); + alu->src[0].src = nir_src_for_ssa(x); + alu->src[1].src = nir_src_for_ssa(y); + alu->fp_math_ctrl = b->fp_math_ctrl; + nir_def_init(&alu->instr, &alu->def, 4, x->bit_size); + nir_builder_instr_insert(b, &alu->instr); + + return &alu->def; +} + +static inline nir_def * +nir_fdot2_replicated(nir_builder *b, nir_def *x, nir_def *y) +{ + return nir_fdot_replicated(b, nir_op_fdot2_replicated, x, y); +} + +static inline nir_def * +nir_fdot3_replicated(nir_builder *b, nir_def *x, nir_def *y) +{ + return nir_fdot_replicated(b, nir_op_fdot3_replicated, x, y); +} + +static inline nir_def * +nir_fdot4_replicated(nir_builder *b, nir_def *x, nir_def *y) +{ + return nir_fdot_replicated(b, nir_op_fdot4_replicated, x, y); +} + +static inline nir_def * +nir_fdph_replicated(nir_builder *b, nir_def *x, nir_def *y) +{ + return nir_fdot_replicated(b, nir_op_fdph_replicated, x, y); +} + #endif