intel: Add support for BFloat16 as cooperative matrix source

Re-organize the configuration lists to make easier to include BFloat16
only for the Gfx125+ that support it, while keeping MTL supporting the
"lowered" configurations from pre-Gfx125.

Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34105>
This commit is contained in:
Caio Oliveira 2025-03-14 13:29:59 -07:00 committed by Marge Bot
parent 2bbe042e87
commit 07fa3b3785
4 changed files with 40 additions and 19 deletions

View file

@ -187,6 +187,7 @@ init_slice_info(struct lower_cmat_state *state,
break;
case GLSL_TYPE_UINT:
case GLSL_TYPE_FLOAT16:
case GLSL_TYPE_BFLOAT16:
case GLSL_TYPE_UINT8:
case GLSL_TYPE_UINT16:
base_type = GLSL_TYPE_UINT;

View file

@ -521,17 +521,36 @@ static const struct intel_device_info intel_device_info_chv = {
.simulator_id = 13,
};
#define CMAT_PRE_XEHP_CONFIGURATIONS \
.cooperative_matrix_configurations = { \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
}
#define CMAT_XEHP_CONFIGURATIONS \
.cooperative_matrix_configurations = { \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
}
#define CMAT_XE2_CONFIGURATIONS \
.cooperative_matrix_configurations = { \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
}
#define GFX9_FEATURES \
GFX8_FEATURES, \
CMAT_PRE_XEHP_CONFIGURATIONS, \
.ver = 9, \
.has_sample_with_hiz = true, \
.has_illegal_ccs_values = true, \
.timestamp_frequency = 12000000, \
.cooperative_matrix_configurations = { \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
}
.timestamp_frequency = 12000000
#define GFX9_MAX_THREADS \
.max_vs_threads = 336, \
@ -1034,6 +1053,7 @@ static const struct intel_device_info intel_device_info_sg1 = {
#define XEHP_FEATURES \
GFX12_FEATURES, \
CMAT_XEHP_CONFIGURATIONS, \
.verx10 = 125, \
.has_lsc = true, \
.has_llc = false, \
@ -1102,6 +1122,7 @@ static const struct intel_device_info intel_device_info_atsm_g11 = {
#define MTL_CONFIG(platform_suffix) \
XEHP_FEATURES, XEHP_PLACEHOLDER_THREADS_AND_URB, \
CMAT_PRE_XEHP_CONFIGURATIONS, \
.platform = INTEL_PLATFORM_ ## platform_suffix, \
.has_64bit_float = true, \
.has_64bit_float_via_math_pipe = true, \
@ -1137,10 +1158,12 @@ static const struct intel_device_info intel_device_info_arl_h = {
.has_bfloat16 = true,
/* BSpec 55414 (r53716). */
.has_systolic = true,
CMAT_XEHP_CONFIGURATIONS,
};
#define XE2_FEATURES \
XEHP_FEATURES, \
CMAT_XE2_CONFIGURATIONS, \
.ver = 20, \
.verx10 = 200, \
.grf_size = 64, \
@ -1149,12 +1172,7 @@ static const struct intel_device_info intel_device_info_arl_h = {
.has_64bit_int = true, \
.has_indirect_unroll = true, \
.has_aux_map = false, \
.has_flat_ccs = true, \
.cooperative_matrix_configurations = { \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
}
.has_flat_ccs = true
/* Note, do not enable PAT 10 or 12 on BMG, according to
* Wa_18038669374 we should not not use any MOCS/PAT settings

View file

@ -169,7 +169,8 @@ Enum("intel_cooperative_matrix_component_type",
"INTEL_CMAT_SINT32",
"INTEL_CMAT_SINT8",
"INTEL_CMAT_UINT32",
"INTEL_CMAT_UINT8"])
"INTEL_CMAT_UINT8",
"INTEL_CMAT_BFLOAT16"])
Enum("intel_engine_class",
["INTEL_ENGINE_CLASS_RENDER",

View file

@ -3119,12 +3119,13 @@ static VkComponentTypeKHR
convert_component_type(enum intel_cooperative_matrix_component_type t)
{
switch (t) {
case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
case INTEL_CMAT_SINT32: return VK_COMPONENT_TYPE_SINT32_KHR;
case INTEL_CMAT_SINT8: return VK_COMPONENT_TYPE_SINT8_KHR;
case INTEL_CMAT_UINT32: return VK_COMPONENT_TYPE_UINT32_KHR;
case INTEL_CMAT_UINT8: return VK_COMPONENT_TYPE_UINT8_KHR;
case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
case INTEL_CMAT_SINT32: return VK_COMPONENT_TYPE_SINT32_KHR;
case INTEL_CMAT_SINT8: return VK_COMPONENT_TYPE_SINT8_KHR;
case INTEL_CMAT_UINT32: return VK_COMPONENT_TYPE_UINT32_KHR;
case INTEL_CMAT_UINT8: return VK_COMPONENT_TYPE_UINT8_KHR;
case INTEL_CMAT_BFLOAT16: return VK_COMPONENT_TYPE_BFLOAT16_KHR;
}
unreachable("invalid cooperative matrix component type in configuration");
}