panvk: add support for {s,u}dot_4x8_{sat}

Generate IDPADD instruction to support integer dot product Support is added for both signed/unsigned dot product as well as saturated dot product. Support is only for v9+. Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34812>
2026-01-07 13:00:21 +01:00 · 2025-05-05 13:59:22 +02:00 · 2025-05-05 13:59:22 +02:00 · ffdc08dfb6
commit ffdc08dfb6
parent dc1c701489
4 changed files with 22 additions and 7 deletions
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@ -3515,6 +3515,20 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
      bi_isub_u32_to(b, dst, bi_imm_u32(src_sz - 1), clz, false);
      break;
   }
+   case nir_op_udot_4x8_uadd_sat:
+   case nir_op_udot_4x8_uadd: {
+      assert(b->shader->arch >= 9);
+      bi_idpadd_v4u8_to(b, dst, s0, s1, s2,
+                        instr->op == nir_op_udot_4x8_uadd_sat);
+      break;
+   }
+   case nir_op_sdot_4x8_iadd_sat:
+   case nir_op_sdot_4x8_iadd: {
+      assert(b->shader->arch >= 9);
+      bi_idpadd_v4s8_to(b, dst, s0, s1, s2,
+                        instr->op == nir_op_sdot_4x8_iadd_sat);
+      break;
+   }

   default:
      fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
--- a/src/panfrost/compiler/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost_compile.h
@ -148,6 +148,10 @@ void bifrost_compile_shader_nir(nir_shader *nir,
      .support_indirect_inputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES),    \
      .lower_hadd = arch >= 11,                                                \
      .discard_is_demote = true,                                               \
+      .has_udot_4x8 = arch >= 9,                                               \
+      .has_udot_4x8_sat = arch >= 9,                                           \
+      .has_sdot_4x8 = arch >= 9,                                               \
+      .has_sdot_4x8_sat = arch >= 9,                                           \
   };

 DEFINE_OPTIONS(6);
--- a/src/panfrost/compiler/valhall/ISA.xml
+++ b/src/panfrost/compiler/valhall/ISA.xml
@ -2235,21 +2235,20 @@
    <src absneg="true">Z coordinate as 32-bit floating point</src>
  </ins>

-  <group name="IDP" title="8-bit dot product" dests="1" opcode="0xC2" unused="true" unit="FMA">
+  <group name="IDPADD" title="8-bit dot product and accumulate" dests="1" opcode="0xC2" unit="FMA">
    <desc>
      8-bit integer dot product between 4 channel vectors, intended for machine
      learning. Available in both unsigned and signed variants, controlling
      sign-extension/zero-extension behaviour to the final 32-bit destination.
      Saturation is available. Corresponds to the `cl_arm_integer_dot_product_*`
-      family of OpenCL extensions. Not for actual use, just for completeness.
-      Instead, use your platform's neural accelerator.
+      family of OpenCL extensions.

      For $A, B \in \{ 0, \ldots, 255 \}^4$ and $\text{Accumulator} \in
      \mathbb{Z}$, calculates $(A \cdot B) + \text{Accumulator}$ and optionally
      saturates.
    </desc>
-    <ins name="IDP.v4s8" opcode2="0"/>
-    <ins name="IDP.v4u8" opcode2="1"/>
+    <ins name="IDPADD.v4s8" opcode2="0"/>
+    <ins name="IDPADD.v4u8" opcode2="1"/>
    <src>A</src>
    <src>B</src>
    <src>Accumulator</src>
--- a/src/panfrost/compiler/valhall/valhall.c.py
+++ b/src/panfrost/compiler/valhall/valhall.c.py
@ -88,8 +88,6 @@ SKIP = set([
        "NOT_OLD.i64",

        # TODO
-        "IDP.v4s8",
-        "IDP.v4u8",
        "FATAN_ASSIST.f32",
        "SEG_ADD.u64",
        "TEX_DUAL",