nir: Add a pass to generate BFI instructions from logical operations

Inspired by a commit message in !30934, I set about optimizing the code generated for nir_copysign. It would be possible to just implement an opt_algebraic pattern for the specific values used by nir_copysign, but this casts a slightly larger net. As noted in a comment in the code, there may be variations of the pattern that this pass misses. The opt_algebraic pattern would miss them too. v2: Use nir_def_replace. Suggested by Alyssa. Allow more "root" instruction types. Suggested by Georg. v3: Treat extract_u16(x, 0) as (x & 0x0000ffff), and treat extract_u8(x, 0) as (x & 0x000000ff). v4: Use nir_scalar. Suggested by Georg. Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31006>
2026-01-06 17:30:20 +01:00 · 2024-09-03 11:41:37 -07:00 · 2024-09-03 11:41:37 -07:00 · 6a09d33549
commit 6a09d33549
parent 057c7c9f53
3 changed files with 163 additions and 0 deletions
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@ -231,6 +231,7 @@ files_libnir = files(
  'nir_opt_find_array_copies.c',
  'nir_opt_fragdepth.c',
  'nir_opt_gcm.c',
+  'nir_opt_generate_bfi.c',
  'nir_opt_idiv_const.c',
  'nir_opt_if.c',
  'nir_opt_intrinsics.c',
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -6701,6 +6701,8 @@ bool nir_opt_fragdepth(nir_shader *shader);

 bool nir_opt_gcm(nir_shader *shader, bool value_number);

+bool nir_opt_generate_bfi(nir_shader *shader);
+
 bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);

 bool nir_opt_mqsad(nir_shader *shader);
--- a/src/compiler/nir/nir_opt_generate_bfi.c
+++ b/src/compiler/nir/nir_opt_generate_bfi.c
@ -0,0 +1,160 @@
+/*
+ * Copyright 2024 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+/**
+ * \file
+ * Identify sequences of logical operations to convert to bfi
+ *
+ * It is difficult for opt_algebraic to match general expressions like
+ *
+ *    (a & some_constant) | (b & ~some_constant)
+ *
+ * Common cases like some_constant = 0x7fffffff can be added, but this may
+ * miss other opportunities. This pass implements that general pattern
+ * matching.
+ *
+ * Either nir_op_bfi or nir_op_bitfield_select may be generated by this pass.
+ *
+ * Future work may also detect cases like:
+ *
+ *    (a & some_constant) | ~(b | some_constant)
+ *    ~((a | some_constant) & (b | ~some_constant))
+ *    etc.
+ */
+
+#include "nir_builder.h"
+
+static bool
+parse_iand(nir_scalar alu, nir_scalar *value, uint32_t *mask)
+{
+   if (nir_scalar_alu_op(alu) == nir_op_iand) {
+      /* If both source are constants, do not perform the conversion. There
+       * are lowerings in opt_algebraic that can generate this pattern on
+       * platforms that set has_bfi and avoid_ternary_with_two_constants.
+       * Undoing that lowering would result in infinite optimization loops.
+       */
+      nir_scalar left = nir_scalar_chase_alu_src(alu, 0);
+      nir_scalar right = nir_scalar_chase_alu_src(alu, 1);
+      if (nir_scalar_is_const(left) && nir_scalar_is_const(right))
+         return false;
+
+      if (nir_scalar_is_const(left)) {
+         *mask = nir_scalar_as_uint(left);
+         *value = right;
+         return true;
+      } else if (nir_scalar_is_const(right)) {
+         *mask = nir_scalar_as_uint(right);
+         *value = left;
+         return true;
+      }
+   } else if (nir_scalar_alu_op(alu) == nir_op_extract_u16 ||
+              nir_scalar_alu_op(alu) == nir_op_extract_u8) {
+      /* There may be leftovers from opt_algebraic that haven't been constant
+       * folded yet.
+       */
+      nir_scalar left = nir_scalar_chase_alu_src(alu, 0);
+      if (nir_scalar_is_const(left))
+         return false;
+
+      if (nir_scalar_as_uint(nir_scalar_chase_alu_src(alu, 1)) == 0) {
+         *mask = nir_scalar_alu_op(alu) == nir_op_extract_u16 ? 0x0000ffff : 0x000000ff;
+         *value = left;
+         return true;
+      }
+   }
+
+   return false;
+}
+
+static bool
+nir_opt_generate_bfi_instr(nir_builder *b,
+                           nir_alu_instr *alu,
+                           UNUSED void *cb_data)
+{
+   /* Since none of the source bits will overlap, these are equvalent. */
+   if ((alu->op != nir_op_ior &&
+        alu->op != nir_op_ixor &&
+        alu->op != nir_op_iadd) ||
+       alu->def.num_components != 1 || alu->def.bit_size != 32)
+      return false;
+
+   nir_scalar alu_scalar = nir_get_scalar(&alu->def, 0);
+   nir_scalar left = nir_scalar_chase_alu_src(alu_scalar, 0);
+   nir_scalar right = nir_scalar_chase_alu_src(alu_scalar, 1);
+
+   if (!nir_scalar_is_alu(left) || !nir_scalar_is_alu(right))
+      return false;
+
+   nir_scalar src1;
+   nir_scalar src2;
+   uint32_t mask1;
+   uint32_t mask2;
+
+   if (!parse_iand(left, &src1, &mask1))
+      return false;
+
+   if (!parse_iand(right, &src2, &mask2))
+      return false;
+
+   if (mask1 != ~mask2)
+      return false;
+
+   nir_scalar insert;
+   nir_scalar base;
+   uint32_t mask;
+
+   /* The mask used by the bfi instruction must be odd. When the mask is odd,
+    * the implict shift applied by the bfi is by zero bits. Since one of the
+    * masks must be odd, the rule can always be applied.
+    *
+    * bitfield_select does not have this restriction, but it doesn't hurt.
+    */
+   if ((mask1 & 1) != 0) {
+      /* Because mask1 == ~mask2. */
+      assert((mask2 & 1) == 0);
+
+      mask = mask1;
+      insert = src1;
+      base = src2;
+   } else {
+      /* Because mask1 == ~mask2. */
+      assert((mask2 & 1) != 0);
+
+      mask = mask2;
+      insert = src2;
+      base = src1;
+   }
+
+   b->cursor = nir_before_instr(&alu->instr);
+
+   nir_def *bfi;
+
+   if (b->shader->options->has_bfi) {
+      bfi = nir_bfi(b,
+                    nir_imm_int(b, mask),
+                    nir_channel(b, insert.def, insert.comp),
+                    nir_channel(b, base.def, base.comp));
+   } else {
+      assert(b->shader->options->has_bitfield_select);
+
+      bfi = nir_bitfield_select(b,
+                                nir_imm_int(b, mask),
+                                nir_channel(b, insert.def, insert.comp),
+                                nir_channel(b, base.def, base.comp));
+   }
+
+   nir_def_replace(&alu->def, bfi);
+   return true;
+}
+
+bool
+nir_opt_generate_bfi(nir_shader *shader)
+{
+   if (!shader->options->has_bfi && !shader->options->has_bitfield_select)
+      return false;
+
+   return nir_shader_alu_pass(shader, nir_opt_generate_bfi_instr,
+                              nir_metadata_control_flow, NULL);
+}