brw: Add BRW_TYPE_BF validation

Reviewed-by: Rohan Garg <rohan.garg@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33664>
2025-12-21 22:20:14 +01:00 · 2024-09-17 09:41:58 -07:00 · 2024-09-17 09:41:58 -07:00 · 62323a934b
commit 62323a934b
parent 9916cc1050
3 changed files with 317 additions and 16 deletions
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@ -425,8 +425,9 @@ execution_type_for_type(enum brw_reg_type type)
   case BRW_TYPE_V:
   case BRW_TYPE_UV:
      return BRW_TYPE_W;
   default:
-      unreachable("invalid type");
+      return BRW_TYPE_INVALID;
   }
 }
@ -564,6 +565,43 @@ is_mixed_float(const brw_hw_decoded_inst *inst)
          types_are_mixed_float(src1_type, dst_type);
 }
 static bool
 is_pure_bfloat(const brw_hw_decoded_inst *inst)
 {
   if (inst_is_send(inst))
      return false;
   if (inst->num_sources == 0 && !inst->has_dst)
      return false;
   for (int i = 0; i < inst->num_sources; i++) {
      if (!brw_type_is_bfloat(inst->src[i].type))
         return false;
   }
   if (inst->has_dst && !brw_type_is_bfloat(inst->dst.type))
      return false;
   return true;
 }
 static bool
 is_mixed_bfloat(const brw_hw_decoded_inst *inst)
 {
   if (inst_is_send(inst))
      return false;
   const int operands = inst->num_sources + inst->has_dst;
   int bfloat = 0;
   for (int i = 0; i < inst->num_sources; i++)
      bfloat += brw_type_is_bfloat(inst->src[i].type);
   if (inst->has_dst)
      bfloat += brw_type_is_bfloat(inst->dst.type);
   return bfloat > 0 && bfloat != operands;
 }
 /**
 * Returns whether an instruction is an explicit or implicit conversion
 * to/from byte.
@ -624,6 +662,10 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
   enum brw_reg_type dst_type = inst->dst.type;
   ERROR_IF(brw_type_is_bfloat(dst_type) &&
            !devinfo->has_bfloat16,
            "Bfloat destination, but platform does not support it");
   ERROR_IF(dst_type == BRW_TYPE_DF &&
            !devinfo->has_64bit_float,
            "64-bit float destination, but platform does not support it");
@ -636,6 +678,10 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
   for (unsigned s = 0; s < inst->num_sources; s++) {
      enum brw_reg_type src_type = inst->src[s].type;
      ERROR_IF(brw_type_is_bfloat(src_type) &&
               !devinfo->has_bfloat16,
               "Bfloat source, but platform does not support it");
      ERROR_IF(src_type == BRW_TYPE_DF &&
               !devinfo->has_64bit_float,
               "64-bit float source, but platform does not support it");
@ -842,7 +888,7 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
    * override the general rule for the ratio of sizes of the destination type
    * and the execution type. We will add validation for those in a later patch.
    */
-   bool validate_dst_size_and_exec_size_ratio = !is_mixed_float(inst);
+   bool validate_dst_size_and_exec_size_ratio = !is_mixed_float(inst) && !is_mixed_bfloat(inst);
   if (validate_dst_size_and_exec_size_ratio &&
       exec_type_size > dst_type_size) {
@ -1016,6 +1062,21 @@ general_restrictions_on_region_parameters(const struct brw_isa_info *isa,
   return error_msg;
 }
 static bool
 is_multiplier_instruction(const brw_hw_decoded_inst *inst)
 {
   /* TODO: Complete this list. */
   switch (inst->opcode) {
   case BRW_OPCODE_MUL:
   case BRW_OPCODE_MAC:
   case BRW_OPCODE_MACH:
   case BRW_OPCODE_MAD:
      return true;
   default:
      return false;
   }
 }
 static struct string
 special_restrictions_for_mixed_float_mode(const struct brw_isa_info *isa,
                                          const brw_hw_decoded_inst *inst)
@ -1024,6 +1085,60 @@ special_restrictions_for_mixed_float_mode(const struct brw_isa_info *isa,
   struct string error_msg = { .str = NULL, .len = 0 };
   ERROR_IF(is_pure_bfloat(inst),
            "Instructions with pure bfloat16 operands are not supported.");
   if (is_mixed_bfloat(inst)) {
      ERROR_IF(devinfo->ver < 20 && inst->exec_size > 8,
               "Execution size must not be greater than 8 in Gfx12.");
      ERROR_IF(devinfo->ver >= 20 && inst->exec_size > 16,
               "Execution size must not be greater than 8 in Gfx20+.");
      for (int i = 0; i < inst->num_sources; i++) {
         ERROR_IF(brw_type_is_bfloat(inst->src[i].type) &&
                  src_has_scalar_region(inst, i),
                  "Broadcast of bfloat16 scalar is not supported.");
      }
      if (is_multiplier_instruction(inst)) {
         if (inst->num_sources == 2) {
            ERROR_IF(brw_type_is_bfloat(inst->src[1].type),
                     "Bfloat16 not allowed in Src1 of 2-source instructions involving multiplier.");
         } else if (inst->num_sources == 3) {
            ERROR_IF(brw_type_is_bfloat(inst->src[2].type),
                     "Bfloat16 not allowed in Src2 of 3-source instructions involving multiplier.");
         }
      }
      const unsigned half_offset = REG_SIZE * reg_unit(devinfo) / 2;
      if (inst->has_dst && brw_type_is_bfloat(inst->dst.type)) {
         unsigned dst_stride = inst->dst.hstride;
         bool dst_is_packed = is_packed(inst->exec_size * dst_stride, inst->exec_size, dst_stride);
         if (dst_is_packed) {
            ERROR_IF(inst->dst.subnr != 0 && inst->dst.subnr != half_offset,
                     "Packed bfloat16 destination must have register offset 0 or half of GRF register.");
         } else {
            /* Offset in the restriction text is in terms of elements. */
            const unsigned elem_size = brw_type_size_bytes(inst->dst.type);
            ERROR_IF(dst_stride != 2 || (inst->dst.subnr != 0 &&
                                         inst->dst.subnr != 1 * elem_size),
                     "Unpacked bfloat16 destination must have stride 2 and register offset 0 or 1.");
         }
      }
      for (int i = 0; i < inst->num_sources; i++) {
         if (brw_type_is_bfloat(inst->src[i].type)) {
            bool src_is_packed = is_packed(inst->src[i].vstride, inst->src[i].width, inst->src[i].hstride);
            ERROR_IF(!src_is_packed,
                     "Bfloat16 source must be packed");
            ERROR_IF(inst->src[i].subnr != 0 && inst->src[i].subnr != half_offset,
                     "Bfloat16 source must have register offset 0 or half of GRF register.");
         }
      }
   }
   const unsigned opcode = inst->opcode;
   if (inst->num_sources >= 3)
      return error_msg;
@ -1640,13 +1755,18 @@ special_requirements_for_handling_double_precision_data_types(
       *
       * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and
       *  Quad-Word data must not be used."
       *
       * and
       *
       * "Vx1 and VxH indirect addressing for BFloat16 (...) data
       *  must not be used."
       */
      if (devinfo->verx10 >= 125 &&
-          (brw_type_is_float(type) || brw_type_size_bytes(type) == 8)) {
+          (brw_type_is_float_or_bfloat(type) || brw_type_size_bytes(type) == 8)) {
         ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER &&
                  vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL,
                  "Vx1 and VxH indirect addressing for Float, Half-Float, "
-                  "Double-Float and Quad-Word data must not be used");
+                  "Double-Float, Quad-Word, and Bfloat16 data must not be used");
      }
   }
@ -2442,6 +2562,18 @@ VSTRIDE_3SRC(unsigned vstride)
   unreachable("invalid vstride");
 }
 static inline unsigned
 brw_implied_width_for_3src_a1(unsigned v, unsigned h)
 {
   /* "Regioning Rules for Align1 Ternary Operations" */
   /* TODO: Add remaining rules and de-duplicate with brw_disasm.c */
   if (v == 0) return 1;
   if (h == 0) return v;
   return v/h;
 }
 static struct string
 brw_hw_decode_inst(const struct brw_isa_info *isa,
                   brw_hw_decoded_inst *inst,
@ -2629,6 +2761,7 @@ brw_hw_decode_inst(const struct brw_isa_info *isa,
            inst->src[0].subnr = brw_eu_inst_3src_a1_src0_subreg_nr(devinfo, raw);
            inst->src[0].vstride = VSTRIDE_3SRC(brw_eu_inst_3src_a1_src0_vstride(devinfo, raw));
            inst->src[0].hstride = STRIDE(brw_eu_inst_3src_a1_src0_hstride(devinfo, raw));
            inst->src[0].width = brw_implied_width_for_3src_a1(inst->src[0].vstride, inst->src[0].hstride);
         }
         inst->src[1].file = brw_eu_inst_3src_a1_src1_reg_file(devinfo, raw);
@ -2639,6 +2772,7 @@ brw_hw_decode_inst(const struct brw_isa_info *isa,
         inst->src[1].subnr = brw_eu_inst_3src_a1_src1_subreg_nr(devinfo, raw);
         inst->src[1].vstride = VSTRIDE_3SRC(brw_eu_inst_3src_a1_src1_vstride(devinfo, raw));
         inst->src[1].hstride = STRIDE(brw_eu_inst_3src_a1_src1_hstride(devinfo, raw));
         inst->src[1].width = brw_implied_width_for_3src_a1(inst->src[1].vstride, inst->src[1].hstride);
         inst->src[2].file = brw_eu_inst_3src_a1_src2_reg_file(devinfo, raw);
         inst->src[2].type = brw_eu_inst_3src_a1_src2_type(devinfo, raw);
@ -2648,6 +2782,7 @@ brw_hw_decode_inst(const struct brw_isa_info *isa,
            inst->src[2].nr = brw_eu_inst_3src_src2_reg_nr(devinfo, raw);
            inst->src[2].subnr = brw_eu_inst_3src_a1_src2_subreg_nr(devinfo, raw);
            inst->src[2].hstride = STRIDE(brw_eu_inst_3src_a1_src2_hstride(devinfo, raw));
            inst->src[2].width = brw_implied_width_for_3src_a1(inst->src[2].vstride, inst->src[2].hstride);
         }
      } else {
--- a/src/intel/compiler/brw_validate.cpp
+++ b/src/intel/compiler/brw_validate.cpp
@ -345,9 +345,9 @@ brw_validate(const brw_shader &s)
               brw_type_is_int(inst->src[1].type) +
               brw_type_is_int(inst->src[2].type);
            const unsigned float_sources =
-               brw_type_is_float(inst->src[0].type) +
+               brw_type_is_float_or_bfloat(inst->src[0].type) +
-               brw_type_is_float(inst->src[1].type) +
+               brw_type_is_float_or_bfloat(inst->src[1].type) +
-               brw_type_is_float(inst->src[2].type);
+               brw_type_is_float_or_bfloat(inst->src[2].type);
            fsv_assert((integer_sources == 3 && float_sources == 0) ||
                       (integer_sources == 0 && float_sources == 3));
--- a/src/intel/compiler/test_eu_validate.cpp
+++ b/src/intel/compiler/test_eu_validate.cpp
@ -105,15 +105,13 @@ INSTANTIATE_TEST_SUITE_P(
 );
 static bool
-validate(struct brw_codegen *p)
+validate(struct brw_codegen *p, char **error = nullptr)
 {
   const bool print = getenv("TEST_DEBUG");
   struct disasm_info *disasm = disasm_initialize(p->isa, NULL);
-   if (print) {
+   struct inst_group *group = disasm_new_inst_group(disasm, 0);
-      disasm_new_inst_group(disasm, 0);
+   disasm_new_inst_group(disasm, p->next_insn_offset);
      disasm_new_inst_group(disasm, p->next_insn_offset);
   }
   bool ret = brw_validate_instructions(p->isa, p->store, 0,
                                        p->next_insn_offset, disasm);
@ -121,7 +119,9 @@ validate(struct brw_codegen *p)
   if (print) {
      dump_assembly(p->store, 0, p->next_insn_offset, disasm, NULL);
   }
-   ralloc_free(disasm);
+
   if (error)
      *error = group->error;
   return ret;
 }
@ -470,13 +470,17 @@ TEST_P(validation_test, invalid_type_encoding_3src_a1)
         }
         struct brw_reg g = retype(g0, test_case[i].type);
-         if (!brw_type_is_int(test_case[i].type)) {
+         if (brw_type_is_bfloat(test_case[i].type)) {
            /* BF is more restrictive, so ensure the instruction is valid. */
            brw_MAD(p, retype(g, BRW_TYPE_F), g, g, retype(g, BRW_TYPE_F));
         } else if (!brw_type_is_int(test_case[i].type)) {
            brw_MAD(p, g, g, g, g);
         } else {
            brw_BFE(p, g, g, g, g);
         }
-         EXPECT_TRUE(validate(p));
+         char *error = NULL;
         EXPECT_TRUE(validate(p, &error)) << "Unexpected validation failure: " << error;
         clear_instructions(p);
      }
@ -2385,7 +2389,9 @@ TEST_P(validation_test, qword_low_power_no_indirect_addressing)
      if (intel_device_info_is_9lp(&devinfo)) {
         EXPECT_EQ(inst[i].expected_result, validate(p));
      } else {
-         EXPECT_TRUE(validate(p));
+         char *error = nullptr;
         EXPECT_TRUE(validate(p, &error))
            << "Test index = " << i << " failed to validate: " << error;
      }
      clear_instructions(p);
@ -3718,3 +3724,163 @@ TEST_P(validation_test, scalar_register_restrictions)
      clear_instructions(p);
   }
 }
 TEST_P(validation_test, bfloat_restrictions)
 {
   /* Restrictions from ACM PRM, vol. 9, section "Register Region
    * Restrictions", sub-section 7.
    */
   if (!devinfo.has_bfloat16)
      return;
   struct test {
      const char *error_pattern;
      enum opcode opcode;
      unsigned exec_size;
      brw_reg dst, src0, src1, src2;
   };
   const char *PASS = nullptr;
   const struct test tests[] = {
      { PASS,
        BRW_OPCODE_MOV, 8, brw_grf(BRW_TYPE_BF, 10, 0, 2,1,2),
                           brw_grf(BRW_TYPE_F,  20, 0, 1,1,0) },
      { "pure bfloat16 operands are not supported",
        BRW_OPCODE_MOV, 8, brw_grf(BRW_TYPE_BF, 10, 0, 2,1,2),
                           brw_grf(BRW_TYPE_BF, 20, 0, 1,1,0) },
      { "Execution size must not be greater than",
        BRW_OPCODE_MOV, 16 * reg_unit(&devinfo),
                        brw_grf(BRW_TYPE_BF, 10, 0, 2,1,2),
                        brw_grf(BRW_TYPE_F,  20, 0, 1,1,0) },
      { PASS,
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 0, 2,1,2),
                           brw_grf(BRW_TYPE_F,  20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0) },
      { "pure bfloat16 operands are not supported",
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 0, 2,1,2),
                           brw_grf(BRW_TYPE_BF, 20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 30, 0, 1,1,0) },
      { "Broadcast of bfloat16 scalar is not supported",
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 0, 2,1,2),
                           brw_grf(BRW_TYPE_F,  20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 30, 0, 0,1,0) },
      { PASS,
        BRW_OPCODE_MUL, 8, brw_grf(BRW_TYPE_BF, 10, 0, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0) },
      { "Bfloat16 not allowed in Src1 of 2-source instructions involving multiplier",
        BRW_OPCODE_MUL, 8, brw_grf(BRW_TYPE_BF, 10, 0, 1,1,0),
                           brw_grf(BRW_TYPE_F,  20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 30, 0, 1,1,0) },
      { PASS,
        BRW_OPCODE_MAD, 8, brw_grf(BRW_TYPE_BF, 10, 0, 2,1,2),
                           brw_grf(BRW_TYPE_BF, 20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 30, 0, 1,1,0),
                           brw_grf(BRW_TYPE_F,  40, 0, 1,1,0) },
      { "Bfloat16 not allowed in Src2 of 3-source instructions involving multiplier",
        BRW_OPCODE_MAD, 8, brw_grf(BRW_TYPE_BF, 10, 0, 2,1,2),
                           brw_grf(BRW_TYPE_BF, 20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 40, 0, 1,1,0) },
      { PASS,
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 1, 2,1,2),
                           brw_grf(BRW_TYPE_F,  20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0) },
      { "Unpacked bfloat16 destination must have stride 2 and register offset 0 or 1",
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 3, 2,1,2),
                           brw_grf(BRW_TYPE_F,  20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0) },
      { PASS,
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 8 * reg_unit(&devinfo), 1,1,0),
                           brw_grf(BRW_TYPE_BF, 20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0) },
      { "Packed bfloat16 destination must have register offset 0 or half of GRF register",
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 1, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 20, 0, 1,1,0),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0) },
      { "Bfloat16 source must be packed",
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 0, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 20, 0, 2,1,2),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0) },
      { PASS,
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 0, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 20, 8 * reg_unit(&devinfo), 1,1,0),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0) },
      { "Bfloat16 source must have register offset 0 or half of GRF register",
        BRW_OPCODE_ADD, 8, brw_grf(BRW_TYPE_BF, 10, 0, 1,1,0),
                           brw_grf(BRW_TYPE_BF, 20, 5, 1,1,0),
                           brw_grf(BRW_TYPE_F,  30, 0, 1,1,0) },
   };
   for (unsigned i = 0; i < ARRAY_SIZE(tests); i++) {
      const struct test &t = tests[i];
      switch (tests[i].opcode) {
      case BRW_OPCODE_MOV:
         brw_MOV(p, t.dst, t.src0);
         break;
      case BRW_OPCODE_ADD:
         brw_ADD(p, t.dst, t.src0, t.src1);
         break;
      case BRW_OPCODE_MUL:
         brw_MUL(p, t.dst, t.src0, t.src1);
         break;
      case BRW_OPCODE_MAD:
         brw_MAD(p, t.dst, t.src0, t.src1, t.src2);
         break;
      default:
         unreachable("unexpected opcode in tests");
      }
      if (tests[i].opcode == BRW_OPCODE_MAD) {
         brw_eu_inst_set_3src_exec_size(&devinfo, last_inst, cvt(t.exec_size) - 1);
      } else {
         brw_eu_inst_set_exec_size(&devinfo, last_inst, cvt(t.exec_size) - 1);
      }
      /* TODO: Expand this test logic to check validation error to other
       * tests.
       */
      char *error = nullptr;
      bool valid = validate(p, &error);
      if (t.error_pattern) {
         EXPECT_FALSE(valid)
            << "Test vector index = " << i << " expected to "
            << "fail validation with error containing: '" << t.error_pattern << "' "
            << "but succeeded instead.";
         if (error) {
            EXPECT_TRUE(strstr(error, t.error_pattern))
               << "Test vector index = " << i << " expected to "
               << "fail validation with error containing: '" << t.error_pattern << "' "
               << "but error was: '" << error << "'.";
         }
      } else {
         EXPECT_TRUE(valid)
            << "Test vector index = " << i << " expected to succeed "
            << "but failed validation with error: '" << error << "'.";
      }
      clear_instructions(p);
   }
 }