aco/assembler: support VOP3P with DPP

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22698>
2026-05-08 04:48:08 +02:00 · 2023-04-23 10:42:58 +02:00 · 2023-04-23 10:42:58 +02:00 · 2548f28ab3
commit 2548f28ab3
parent d0e73cb313
3 changed files with 42 additions and 46 deletions
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@ -765,10 +765,45 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
         unreachable("Pseudo instructions should be lowered before assembly.");
      break;
   default:
-      /* TODO: VOP3/VOP3P can use DPP8/16 on GFX11 (encoding of src0 and DPP8/16 word seems same
-       * except abs/neg is ignored). src2 cannot be literal and src0/src1 must be VGPR.
-       */
-      if (instr->isVOP3() && !instr->isDPP()) {
+      if (instr->isDPP16()) {
+         assert(ctx.gfx_level >= GFX8);
+         DPP16_instruction& dpp = instr->dpp16();
+
+         /* first emit the instruction without the DPP operand */
+         Operand dpp_op = instr->operands[0];
+         instr->operands[0] = Operand(PhysReg{250}, v1);
+         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
+         emit_instruction(ctx, out, instr);
+         uint32_t encoding = (0xF & dpp.row_mask) << 28;
+         encoding |= (0xF & dpp.bank_mask) << 24;
+         encoding |= dpp.abs[1] << 23;
+         encoding |= dpp.neg[1] << 22;
+         encoding |= dpp.abs[0] << 21;
+         encoding |= dpp.neg[0] << 20;
+         if (ctx.gfx_level >= GFX10)
+            encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
+         encoding |= dpp.bound_ctrl << 19;
+         encoding |= dpp.dpp_ctrl << 8;
+         encoding |= reg(ctx, dpp_op, 8);
+         encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
+         out.push_back(encoding);
+         return;
+      } else if (instr->isDPP8()) {
+         assert(ctx.gfx_level >= GFX10);
+         DPP8_instruction& dpp = instr->dpp8();
+
+         /* first emit the instruction without the DPP operand */
+         Operand dpp_op = instr->operands[0];
+         instr->operands[0] = Operand(PhysReg{234}, v1);
+         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
+         emit_instruction(ctx, out, instr);
+         uint32_t encoding = reg(ctx, dpp_op, 8);
+         encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
+         for (unsigned i = 0; i < 8; ++i)
+            encoding |= dpp.lane_sel[i] << (8 + i * 3);
+         out.push_back(encoding);
+         return;
+      } else if (instr->isVOP3()) {
         VALU_instruction& vop3 = instr->valu();

         if (instr->isVOP2()) {
@ -855,45 +890,6 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
         for (unsigned i = 0; i < 3; i++)
            encoding |= vop3.neg_lo[i] << (29 + i);
         out.push_back(encoding);
-
-      } else if (instr->isDPP16()) {
-         assert(ctx.gfx_level >= GFX8);
-         DPP16_instruction& dpp = instr->dpp16();
-
-         /* first emit the instruction without the DPP operand */
-         Operand dpp_op = instr->operands[0];
-         instr->operands[0] = Operand(PhysReg{250}, v1);
-         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
-         emit_instruction(ctx, out, instr);
-         uint32_t encoding = (0xF & dpp.row_mask) << 28;
-         encoding |= (0xF & dpp.bank_mask) << 24;
-         encoding |= dpp.abs[1] << 23;
-         encoding |= dpp.neg[1] << 22;
-         encoding |= dpp.abs[0] << 21;
-         encoding |= dpp.neg[0] << 20;
-         if (ctx.gfx_level >= GFX10)
-            encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
-         encoding |= dpp.bound_ctrl << 19;
-         encoding |= dpp.dpp_ctrl << 8;
-         encoding |= reg(ctx, dpp_op, 8);
-         encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
-         out.push_back(encoding);
-         return;
-      } else if (instr->isDPP8()) {
-         assert(ctx.gfx_level >= GFX10);
-         DPP8_instruction& dpp = instr->dpp8();
-
-         /* first emit the instruction without the DPP operand */
-         Operand dpp_op = instr->operands[0];
-         instr->operands[0] = Operand(PhysReg{234}, v1);
-         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
-         emit_instruction(ctx, out, instr);
-         uint32_t encoding = reg(ctx, dpp_op, 8);
-         encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0;
-         for (unsigned i = 0; i < 8; ++i)
-            encoding |= dpp.lane_sel[i] << (8 + i * 3);
-         out.push_back(encoding);
-         return;
      } else if (instr->isSDWA()) {
         assert(ctx.gfx_level >= GFX8 && ctx.gfx_level < GFX11);
         SDWA_instruction& sdwa = instr->sdwa();
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@ -98,8 +98,8 @@ enum class Format : std::uint16_t {
   PSEUDO_REDUCTION = 19,

   /* Vector ALU Formats */
-   VOP3P = 20,
   VINTERP_INREG = 21,
+   VOP3P = 1 << 7,
   VOP1 = 1 << 8,
   VOP2 = 1 << 9,
   VOPC = 1 << 10,
@ -1248,7 +1248,7 @@ struct Instruction {
      return *(Pseudo_reduction_instruction*)this;
   }
   constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
-   constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; }
+   constexpr bool isVOP3P() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3P; }
   VINTERP_inreg_instruction& vinterp_inreg() noexcept
   {
      assert(isVINTERP_INREG());
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@ -69,8 +69,8 @@ class Format(Enum):
   PSEUDO_BRANCH = 17
   PSEUDO_BARRIER = 18
   PSEUDO_REDUCTION = 19
-   VOP3P = 20
   VINTERP_INREG = 21
+   VOP3P = 1 << 7
   VOP1 = 1 << 8
   VOP2 = 1 << 9
   VOPC = 1 << 10