brw: Add BRW_TYPE_BF for bfloat16

Reviewed-by: Rohan Garg <rohan.garg@intel.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33664>
2025-12-21 20:10:14 +01:00 · 2024-09-07 09:41:01 -07:00 · 2024-09-07 09:41:01 -07:00 · 9916cc1050
commit 9916cc1050
parent d1f4fb8eee
10 changed files with 47 additions and 16 deletions
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@ -611,7 +611,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
      brw_eu_inst_set_3src_a1_dst_hstride(devinfo, inst,
                                          to_3src_align1_dst_hstride(dest.hstride));

-      if (brw_type_is_float(dest.type)) {
+      if (brw_type_is_float_or_bfloat(dest.type)) {
         brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
                                        BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
      } else {
@ -771,7 +771,7 @@ brw_dpas_three_src(struct brw_codegen *p, enum opcode opcode,
   brw_eu_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
   brw_eu_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));

-   if (brw_type_is_float(dest.type)) {
+   if (brw_type_is_float_or_bfloat(dest.type)) {
      brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
                                       BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
   } else {
--- a/src/intel/compiler/brw_eu_inst.h
+++ b/src/intel/compiler/brw_eu_inst.h
@ -535,7 +535,7 @@ brw_eu_inst_set_3src_a1_##reg##_type(const struct intel_device_info *devinfo, \
   UNUSED enum brw_align1_3src_exec_type exec_type =                          \
      (enum brw_align1_3src_exec_type)                                        \
          brw_eu_inst_3src_a1_exec_type(devinfo, inst);                       \
-   if (brw_type_is_float(type)) {                                             \
+   if (brw_type_is_float_or_bfloat(type)) {                                   \
      assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);                   \
   } else {                                                                   \
      assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_INT);                     \
@ -646,7 +646,7 @@ brw_eu_inst_set_dpas_3src_##reg##_type(const struct intel_device_info *devinfo,
   UNUSED enum brw_align1_3src_exec_type exec_type =                          \
      (enum brw_align1_3src_exec_type)                                        \
         brw_eu_inst_dpas_3src_exec_type(devinfo, inst);                      \
-   if (brw_type_is_float(type)) {                                             \
+   if (brw_type_is_float_or_bfloat(type)) {                                   \
      assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);                   \
   } else {                                                                   \
      assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_INT);                     \
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@ -407,6 +407,7 @@ execution_type_for_type(enum brw_reg_type type)
      return type;

   case BRW_TYPE_VF:
+   case BRW_TYPE_BF:
      return BRW_TYPE_F;

   case BRW_TYPE_Q:
--- a/src/intel/compiler/brw_inst.cpp
+++ b/src/intel/compiler/brw_inst.cpp
@ -1031,7 +1031,7 @@ get_exec_type(const brw_inst *inst)
         if (brw_type_size_bytes(t) > brw_type_size_bytes(exec_type))
            exec_type = t;
         else if (brw_type_size_bytes(t) == brw_type_size_bytes(exec_type) &&
-                  brw_type_is_float(t))
+                  brw_type_is_float_or_bfloat(t))
            exec_type = t;
      }
   }
--- a/src/intel/compiler/brw_reg.h
+++ b/src/intel/compiler/brw_reg.h
@ -363,6 +363,7 @@ get_exec_type(const enum brw_reg_type type)
   case BRW_TYPE_UV:
      return BRW_TYPE_UW;
   case BRW_TYPE_VF:
+   case BRW_TYPE_BF:
      return BRW_TYPE_F;
   default:
      return type;
--- a/src/intel/compiler/brw_reg_type.c
+++ b/src/intel/compiler/brw_reg_type.c
@ -46,6 +46,9 @@ brw_type_encode(const struct intel_device_info *devinfo,
                               : devinfo->has_64bit_float))
      return INVALID_HW_REG_TYPE;

+   if (brw_type_is_bfloat(type) && !devinfo->has_bfloat16)
+      return INVALID_HW_REG_TYPE;
+
   if (devinfo->ver >= 12) {
      if (brw_type_is_vector_imm(type))
         return type & ~(BRW_TYPE_VECTOR | BRW_TYPE_SIZE_MASK);
@ -121,8 +124,9 @@ brw_type_decode(const struct intel_device_info *devinfo,
         else if (file == IMM)
            return (t & BRW_TYPE_BASE_SINT) ? BRW_TYPE_V : BRW_TYPE_UV;
      }
-      /* signed-integer floats -> no */
-      if ((t & BRW_TYPE_BASE_MASK) == BRW_TYPE_BASE_MASK)
+      if (brw_type_is_bfloat(t) && !devinfo->has_bfloat16)
+         return BRW_TYPE_INVALID;
+      if (brw_type_is_float_or_bfloat(t) && brw_type_size_bits(t) < 16)
         return BRW_TYPE_INVALID;
      return t;
   } else if (devinfo->ver >= 11) {
@ -191,6 +195,9 @@ unsigned
 brw_type_encode_for_3src(const struct intel_device_info *devinfo,
                         enum brw_reg_type type)
 {
+   if (brw_type_is_bfloat(type) && !devinfo->has_bfloat16)
+      return INVALID_HW_REG_TYPE;
+
   if (devinfo->ver >= 12) {
      /* size mask and SINT type bit match exactly */
      return type & 0b111;
@ -238,7 +245,7 @@ brw_type_decode_for_3src(const struct intel_device_info *devinfo,
      unsigned base_field = hw_type & BRW_TYPE_BASE_MASK;
      if (exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT) {
         base_field |= BRW_TYPE_BASE_FLOAT;
-         if (base_field & BRW_TYPE_BASE_SINT)
+         if (base_field == BRW_TYPE_BASE_BFLOAT && !devinfo->has_bfloat16)
            return BRW_TYPE_INVALID;
      }
      return (enum brw_reg_type) (base_field | size_field);
@ -288,6 +295,8 @@ brw_reg_type_to_letters(enum brw_reg_type type)
      [BRW_TYPE_F]  = "F",
      [BRW_TYPE_DF] = "DF",

+      [BRW_TYPE_BF] = "BF",
+
      [BRW_TYPE_UV] = "UV",
      [BRW_TYPE_V]  = "V",
      [BRW_TYPE_VF] = "VF",
--- a/src/intel/compiler/brw_reg_type.h
+++ b/src/intel/compiler/brw_reg_type.h
@ -42,8 +42,8 @@ struct intel_device_info;
 * Enum for register/value types throughout the compiler.
 *
 * Bits 1:0 is the size of the type as a U2 'n' where size = 8 * 2^n.
- * Bit 3 is set for signed integer types (B/W/D/Q/V/UV).
- * Bit 4 is set for floating point types.  Unsigned types have neither set.
+ * Bits 3:4 is set to identify base type: unsigned integer, signed integer,
+ * regular floating point and bfloat.
 * Bit 5 is set for vector immediates.
 *
 * While this is inspired by the Tigerlake encodings (and nir_alu_type),
@ -74,6 +74,10 @@ enum PACKED brw_reg_type {
   BRW_TYPE_DF = 0b01011,
   /** @} */

+   /** Floating point types (bfloat variants): 16-bit @{ */
+   BRW_TYPE_BF  = 0b01101,
+   /** @} */
+
   /** Vector immediate types */
   BRW_TYPE_UV = 0b10001,
   BRW_TYPE_V  = 0b10101,
@ -85,6 +89,7 @@ enum PACKED brw_reg_type {
   BRW_TYPE_BASE_UINT   = 0b00000, /* unsigned types have no base bits set */
   BRW_TYPE_BASE_SINT   = 0b00100, /* type has a signed integer base type */
   BRW_TYPE_BASE_FLOAT  = 0b01000, /* type has a floating point base type */
+   BRW_TYPE_BASE_BFLOAT = 0b01100, /* type has a floating point (bfloat variant) base type */
   BRW_TYPE_VECTOR      = 0b10000, /* type is a vector immediate */

   BRW_TYPE_INVALID    = 0b11111,
@ -97,6 +102,18 @@ brw_type_is_float(enum brw_reg_type t)
   return (t & BRW_TYPE_BASE_MASK) == BRW_TYPE_BASE_FLOAT;
 }

+static inline bool
+brw_type_is_bfloat(enum brw_reg_type t)
+{
+   return (t & BRW_TYPE_BASE_MASK) == BRW_TYPE_BASE_BFLOAT;
+}
+
+static inline bool
+brw_type_is_float_or_bfloat(enum brw_reg_type t)
+{
+   return brw_type_is_float(t) || brw_type_is_bfloat(t);
+}
+
 static inline bool
 brw_type_is_uint(enum brw_reg_type t)
 {
--- a/src/intel/compiler/test_eu_validate.cpp
+++ b/src/intel/compiler/test_eu_validate.cpp
@ -443,6 +443,7 @@ TEST_P(validation_test, invalid_type_encoding_3src_a1)
      { BRW_TYPE_UD, E(INT),   true  },
      { BRW_TYPE_W,  E(INT),   true  },
      { BRW_TYPE_UW, E(INT),   true  },
+      { BRW_TYPE_BF, E(FLOAT), devinfo.has_bfloat16 },

      /* There are no ternary instructions that can operate on B-type sources
       * on Gfx11-12. Src1/Src2 cannot be B-typed either.
--- a/src/intel/dev/intel_device_info.c
+++ b/src/intel/dev/intel_device_info.c
@ -1045,6 +1045,7 @@ static const struct intel_device_info intel_device_info_sg1 = {
   .has_llc = false,                                            \
   .has_ray_tracing = true,                                     \
   .has_mesh_shading = true,                                    \
+   .has_bfloat16 = true,                                        \
   .has_coarse_pixel_primitive_and_cb = true,                   \
   .needs_null_push_constant_tbimr_workaround = true,           \
   .simulator_id = 29
--- a/src/intel/dev/intel_device_info.py
+++ b/src/intel/dev/intel_device_info.py
@ -277,6 +277,7 @@ Struct("intel_device_info",
        Member("bool", "has_64bit_float", compiler_field=True),
        Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True),
        Member("bool", "has_64bit_int", compiler_field=True),
+        Member("bool", "has_bfloat16", compiler_field=True),
        Member("bool", "has_integer_dword_mul", compiler_field=True),
        Member("bool", "supports_simd16_3src", compiler_field=True),
        Member("bool", "disable_ccs_repack"),