aco/assembler/gfx11: simplify 16bit VOP12C promotion to VOP3

With the shared struct for modifies, this is can be a lot cleaner now. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21761>
2026-05-04 22:49:13 +02:00 · 2023-02-03 13:28:32 +01:00 · 2023-02-03 13:28:32 +01:00 · 57557e8815
commit 57557e8815
parent ae50b66251
1 changed files with 37 additions and 76 deletions
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@ -100,7 +100,7 @@ reg(asm_context& ctx, Definition def, unsigned width = 32)
 }

 bool
-needs_vop3_gfx11(asm_context& ctx, Instruction* instr, Operand *dpp_op)
+needs_vop3_gfx11(asm_context& ctx, Instruction* instr)
 {
   if (ctx.gfx_level <= GFX10_3)
      return false;
@ -110,8 +110,6 @@ needs_vop3_gfx11(asm_context& ctx, Instruction* instr, Operand *dpp_op)
      return false;

   u_foreach_bit (i, mask & 0x3) {
-      if (i == 0 && dpp_op && dpp_op->physReg().reg() >= (256 + 128))
-         return true;
      if (instr->operands[i].physReg().reg() >= (256 + 128))
         return true;
   }
@ -121,8 +119,7 @@ needs_vop3_gfx11(asm_context& ctx, Instruction* instr, Operand *dpp_op)
 }

 void
-emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr,
-                 Operand *dpp_op_ptr = NULL, DPP16_instruction *dpp16_ptr = NULL)
+emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
 {
   /* lower remaining pseudo-instructions */
   if (instr->opcode == aco_opcode::p_constaddr_getpc) {
@ -140,6 +137,20 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
      instr->operands[1] = Operand::literal32(instr->operands[1].constantValue());
   }

+   /* Promote VOP12C to VOP3 if necessary. */
+   if ((instr->isVOP1() || instr->isVOP2() || instr->isVOPC()) && !instr->isVOP3() &&
+       needs_vop3_gfx11(ctx, instr)) {
+      instr->format = asVOP3(instr->format);
+      if (instr->opcode == aco_opcode::v_fmaak_f16) {
+         instr->opcode = aco_opcode::v_fma_f16;
+         instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2);
+      } else if (instr->opcode == aco_opcode::v_fmamk_f16) {
+         std::swap(instr->operands[1], instr->operands[2]);
+         instr->opcode = aco_opcode::v_fma_f16;
+         instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2);
+      }
+   }
+
   uint32_t opcode = ctx.opcode[(int)instr->opcode];
   if (opcode == (uint32_t)-1) {
      char* outmem;
@ -321,80 +332,30 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
      return;
   }
   case Format::VOP2: {
-      if (needs_vop3_gfx11(ctx, instr, dpp_op_ptr)) {
-         if (instr->opcode == aco_opcode::v_fmaak_f16) {
-            opcode = ctx.opcode[(int)aco_opcode::v_fma_f16];
-         } else if (instr->opcode == aco_opcode::v_fmamk_f16) {
-            std::swap(instr->operands[1], instr->operands[2]);
-            opcode = ctx.opcode[(int)aco_opcode::v_fma_f16];
-         } else {
-            opcode += 0x100;
-         }
-
-         uint32_t encoding = (0b110101 << 26);
-         encoding |= opcode << 16;
-         encoding |= reg(ctx, instr->definitions[0], 8);
-         encoding |= dpp16_ptr ? (dpp16_ptr->abs[0] << 8) | (dpp16_ptr->abs[1] << 9) : 0;
-         out.push_back(encoding);
-
-         encoding = reg(ctx, instr->operands[0]);
-         encoding |= reg(ctx, instr->operands[1]) << 9;
-         if (instr->opcode == aco_opcode::v_fmaak_f16 ||
-             instr->opcode == aco_opcode::v_fmamk_f16)
-            encoding |= reg(ctx, instr->operands[2]) << 18;
-         encoding |= dpp16_ptr ? (dpp16_ptr->neg[0] << 29) | (dpp16_ptr->neg[1] << 30) : 0;
-         out.push_back(encoding);
-      } else {
-         uint32_t encoding = 0;
-         encoding |= opcode << 25;
-         encoding |= reg(ctx, instr->definitions[0], 8) << 17;
-         encoding |= reg(ctx, instr->operands[1], 8) << 9;
-         encoding |= reg(ctx, instr->operands[0]);
-         out.push_back(encoding);
-      }
+      uint32_t encoding = 0;
+      encoding |= opcode << 25;
+      encoding |= reg(ctx, instr->definitions[0], 8) << 17;
+      encoding |= reg(ctx, instr->operands[1], 8) << 9;
+      encoding |= reg(ctx, instr->operands[0]);
+      out.push_back(encoding);
      break;
   }
   case Format::VOP1: {
-      if (needs_vop3_gfx11(ctx, instr, dpp_op_ptr)) {
-         uint32_t encoding = (0b110101 << 26);
-         encoding |= (opcode + 0x180) << 16;
-         encoding |= reg(ctx, instr->definitions[0], 8);
-         encoding |= dpp16_ptr ? dpp16_ptr->abs[0] << 8 : 0;
-         out.push_back(encoding);
-
-         encoding = reg(ctx, instr->operands[0]);
-         encoding |= dpp16_ptr ? dpp16_ptr->neg[0] << 29 : 0;
-         out.push_back(encoding);
-      } else {
-         uint32_t encoding = (0b0111111 << 25);
-         if (!instr->definitions.empty())
-            encoding |= reg(ctx, instr->definitions[0], 8) << 17;
-         encoding |= opcode << 9;
-         if (!instr->operands.empty())
-            encoding |= reg(ctx, instr->operands[0]);
-         out.push_back(encoding);
-      }
+      uint32_t encoding = (0b0111111 << 25);
+      if (!instr->definitions.empty())
+         encoding |= reg(ctx, instr->definitions[0], 8) << 17;
+      encoding |= opcode << 9;
+      if (!instr->operands.empty())
+         encoding |= reg(ctx, instr->operands[0]);
+      out.push_back(encoding);
      break;
   }
   case Format::VOPC: {
-      if (needs_vop3_gfx11(ctx, instr, dpp_op_ptr)) {
-         uint32_t encoding = (0b110101 << 26);
-         encoding |= opcode << 16;
-         encoding |= reg(ctx, instr->definitions[0], 8);
-         encoding |= dpp16_ptr ? (dpp16_ptr->abs[0] << 8) | (dpp16_ptr->abs[1] << 9) : 0;
-         out.push_back(encoding);
-
-         encoding = reg(ctx, instr->operands[0]);
-         encoding |= reg(ctx, instr->operands[1]) << 9;
-         encoding |= dpp16_ptr ? (dpp16_ptr->neg[0] << 29) | (dpp16_ptr->neg[1] << 30) : 0;
-         out.push_back(encoding);
-      } else {
-         uint32_t encoding = (0b0111110 << 25);
-         encoding |= opcode << 17;
-         encoding |= reg(ctx, instr->operands[1], 8) << 9;
-         encoding |= reg(ctx, instr->operands[0]);
-         out.push_back(encoding);
-      }
+      uint32_t encoding = (0b0111110 << 25);
+      encoding |= opcode << 17;
+      encoding |= reg(ctx, instr->operands[1], 8) << 9;
+      encoding |= reg(ctx, instr->operands[0]);
+      out.push_back(encoding);
      break;
   }
   case Format::VINTRP: {
@ -779,7 +740,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
      /* TODO: VOP3/VOP3P can use DPP8/16 on GFX11 (encoding of src0 and DPP8/16 word seems same
       * except abs/neg is ignored). src2 cannot be literal and src0/src1 must be VGPR.
       */
-      if (instr->isVOP3()) {
+      if (instr->isVOP3() && !instr->isDPP()) {
         VALU_instruction& vop3 = instr->valu();

         if (instr->isVOP2()) {
@ -875,7 +836,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
         Operand dpp_op = instr->operands[0];
         instr->operands[0] = Operand(PhysReg{250}, v1);
         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
-         emit_instruction(ctx, out, instr, &dpp_op, &dpp);
+         emit_instruction(ctx, out, instr);
         uint32_t encoding = (0xF & dpp.row_mask) << 28;
         encoding |= (0xF & dpp.bank_mask) << 24;
         encoding |= dpp.abs[1] << 23;
@ -897,7 +858,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
         Operand dpp_op = instr->operands[0];
         instr->operands[0] = Operand(PhysReg{234}, v1);
         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
-         emit_instruction(ctx, out, instr, &dpp_op);
+         emit_instruction(ctx, out, instr);
         uint32_t encoding = reg(ctx, dpp_op, 8);
         for (unsigned i = 0; i < 8; ++i)
            encoding |= dpp.lane_sel[i] << (8 + i * 3);