Merge branch 'shift-and' into 'main'

pan/bi: Add support for RSHIFT_AND See merge request mesa/mesa!38976
2025-12-20 07:20:10 +01:00 · 2025-12-20 01:47:13 +01:00 · 2025-12-20 01:47:13 +01:00 · 6ab69d8954
commit 6ab69d8954
parent c430f394c5 671fcfeb4b
3 changed files with 21 additions and 0 deletions
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@ -1511,6 +1511,10 @@ if (!isnormal(dst))
   dst = copysignf(0.0f, src0);
 """)

+opcode("ushr_and_pan", 0, tuint, [0, 0, 0], [tuint, tuint, tuint], False, "",
+       "(src0 >> (src1 & (sizeof(src0) * 8 - 1))) & src2",
+       description = "Unsigned right-shift followed by a bitwise AND." + shift_note)
+
 # vc4-specific opcodes

 # Saturated vector add for 4 8bit ints.
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@ -3926,6 +3926,10 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                        instr->op == nir_op_sdot_4x8_iadd_sat);
      break;
   }
+   case nir_op_ushr_and_pan: {
+      bi_rshift_and_to(b, sz, dst, s0, s2, bi_byte(s1, 0), false);
+      break;
+   }

   default:
      fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
@ -5849,6 +5853,9 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, nir_variable_mode robust2_mode
      late_algebraic |= late_algebraic_progress;
   }

+   /* Vectorize instruction generated by bifrost_nir_lower_algebraic_late */
+   NIR_PASS(_, nir, nir_opt_vectorize, bi_vectorize_filter, &gpu_id);
+
   while (late_algebraic) {
      late_algebraic = false;
      NIR_PASS(late_algebraic, nir, nir_opt_algebraic_late);
@ -5858,6 +5865,9 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, nir_variable_mode robust2_mode
      NIR_PASS(_, nir, nir_opt_cse);
   }

+   /* Scalarize vector generated by nir_opt_algebraic_late that are not supported */
+   NIR_PASS(_, nir, nir_lower_alu_width, bi_vectorize_filter, &gpu_id);
+
   NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
   NIR_PASS(_, nir, nir_opt_dce);

--- a/src/panfrost/compiler/bifrost/bifrost_nir_algebraic.py
+++ b/src/panfrost/compiler/bifrost/bifrost_nir_algebraic.py
@ -104,6 +104,13 @@ algebraic_late = [
    # On v11+, because FROUND.v2f16 is gone we end up with precision issues.
    # We lower ffract here instead to ensure lower_bit_size has been performed.
    (('ffract', a), ('fadd', a, ('fneg', ('ffloor', a))), 'gpu_arch >= 11'),
+
+    # Fuse SHIFT and bitwise AND
+    (('iand', ('ushr', 'a@32', 'b@32'), 'c@32'), ('ushr_and_pan', a, b, c)),
+    (('iand', ('ushr', 'a@16', 'b@32'), 'c@16'), ('ushr_and_pan', a, ('u2u16', b), c)),
+    (('iand', ('extract_u8', 'a@16', 1), 15), ('ushr_and_pan', a, 8, 15)),
+    (('iand', 'a@16', 15), ('ushr_and_pan', a, 0, 15)),
+    (('ushr', 'a@16', 12), ('ushr_and_pan', a, 12, 15)),
 ]

 # nir_lower_bool_to_bitsize can generate needless conversions.