intel: Move cmat configurations to anv_physical_device

Some cooperative properties are defined by the driver itself and
are not a property of the HW.  In particular whether the scope is
subgroup or workgroup is not directly related to the HW.

It could make sense encode the DPAS combinations into intel_device_info
but we are not using all possible combinations yet and wouldn't be very
useful in practice.

The new scheme was based on radv and will set us up for also filling
the flexible dimensions properties too.

Note: this also fixes a subtle issue where ARL was incorrectly inheriting
the PRE_XEHP configurations which included FLOAT16/FLOAT16/FLOAT16/FLOAT16
which it does not support.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41564>
This commit is contained in:
Caio Oliveira 2026-04-09 13:38:39 -07:00 committed by Marge Bot
parent 088eeb2d81
commit d3cfe04b3d
3 changed files with 58 additions and 117 deletions

View file

@ -530,35 +530,8 @@ static const struct intel_device_info intel_device_info_chv = {
.simulator_id = 13,
};
#define CMAT_PRE_XEHP_CONFIGURATIONS \
.cooperative_matrix_configurations = { \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
}
#define CMAT_XEHP_CONFIGURATIONS \
.cooperative_matrix_configurations = { \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
}
#define CMAT_XE2_CONFIGURATIONS \
.cooperative_matrix_configurations = { \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
{ INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
}
#define GFX9_FEATURES \
GFX8_FEATURES, \
CMAT_PRE_XEHP_CONFIGURATIONS, \
.ver = 9, \
.has_sample_with_hiz = true, \
.timestamp_frequency = 12000000
@ -1062,7 +1035,6 @@ static const struct intel_device_info intel_device_info_sg1 = {
#define XEHP_FEATURES \
GFX12_FEATURES, \
CMAT_XEHP_CONFIGURATIONS, \
.verx10 = 125, \
.has_lsc = true, \
.has_llc = false, \
@ -1131,7 +1103,6 @@ static const struct intel_device_info intel_device_info_atsm_g11 = {
#define MTL_CONFIG(platform_suffix) \
XEHP_FEATURES, XEHP_PLACEHOLDER_THREADS_AND_URB, \
CMAT_PRE_XEHP_CONFIGURATIONS, \
.platform = INTEL_PLATFORM_ ## platform_suffix, \
.has_64bit_float = true, \
.has_64bit_float_via_math_pipe = true, \
@ -1167,12 +1138,10 @@ static const struct intel_device_info intel_device_info_arl_h = {
.has_bfloat16 = true,
/* BSpec 55414 (r53716). */
.has_systolic = true,
CMAT_XEHP_CONFIGURATIONS,
};
#define XE2_FEATURES \
XEHP_FEATURES, \
CMAT_XE2_CONFIGURATIONS, \
.ver = 20, \
.verx10 = 200, \
.grf_size = 64, \

View file

@ -159,19 +159,6 @@ Struct("intel_device_info_pat_entry",
This tells KMD what caching mode the CPU mapping should use.
It has nothing to do with any PAT cache modes."""))])
Enum("intel_cmat_scope",
[EnumValue("INTEL_CMAT_SCOPE_NONE", value=0),
"INTEL_CMAT_SCOPE_SUBGROUP"])
Enum("intel_cooperative_matrix_component_type",
["INTEL_CMAT_FLOAT16",
"INTEL_CMAT_FLOAT32",
"INTEL_CMAT_SINT32",
"INTEL_CMAT_SINT8",
"INTEL_CMAT_UINT32",
"INTEL_CMAT_UINT8",
"INTEL_CMAT_BFLOAT16"])
Enum("intel_engine_class",
["INTEL_ENGINE_CLASS_RENDER",
"INTEL_ENGINE_CLASS_COPY",
@ -180,22 +167,6 @@ Enum("intel_engine_class",
"INTEL_ENGINE_CLASS_COMPUTE",
"INTEL_ENGINE_CLASS_INVALID"])
Struct("intel_cooperative_matrix_configuration",
[Member("intel_cmat_scope", "scope",
comment=dedent("""\
Matrix A is MxK.
Matrix B is KxN.
Matrix C and Matrix Result are MxN.
Result = A * B + C;""")),
Member("uint8_t", "m"),
Member("uint8_t", "n"),
Member("uint8_t", "k"),
Member("intel_cooperative_matrix_component_type", "a"),
Member("intel_cooperative_matrix_component_type", "b"),
Member("intel_cooperative_matrix_component_type", "c"),
Member("intel_cooperative_matrix_component_type", "result")])
Enum("intel_kmd_type",
["INTEL_KMD_TYPE_INVALID",
"INTEL_KMD_TYPE_I915",
@ -485,8 +456,6 @@ Struct("intel_device_info",
Member("bool", "probe_forced", comment="Device needed INTEL_FORCE_PROBE"),
Member("intel_device_info_mem_desc", "mem"),
Member("intel_device_info_pat_desc", "pat"),
Member("intel_cooperative_matrix_configuration",
"cooperative_matrix_configurations", array=16),
Member("bool", "is_virtio")]
)

View file

@ -2885,8 +2885,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
goto fail_base;
device->has_cooperative_matrix =
(device->info.has_systolic || debug_get_bool_option("INTEL_LOWER_DPAS", false)) &&
device->info.cooperative_matrix_configurations[0].scope != INTEL_CMAT_SCOPE_NONE &&
(device->info.has_systolic ||
debug_get_bool_option("INTEL_LOWER_DPAS", false)) &&
!intel_use_jay_any_stage(&device->info);
/* Because of Xe2 PAT selected compression and the Vulkan spec requirement
@ -3418,28 +3418,66 @@ VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
return vk_outarray_status(&out);
}
static VkComponentTypeKHR
convert_component_type(enum intel_cooperative_matrix_component_type t)
static void
anv_fill_all_cooperative_matrix_props(const struct anv_physical_device *pdevice, struct __vk_outarray *base,
void (*fill_cb)(struct __vk_outarray *base, unsigned exec_size,
VkComponentTypeKHR a_type, VkComponentTypeKHR b_type,
VkComponentTypeKHR c_type, VkComponentTypeKHR r_type,
unsigned ops_per_chan, bool saturate))
{
switch (t) {
case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
case INTEL_CMAT_SINT32: return VK_COMPONENT_TYPE_SINT32_KHR;
case INTEL_CMAT_SINT8: return VK_COMPONENT_TYPE_SINT8_KHR;
case INTEL_CMAT_UINT32: return VK_COMPONENT_TYPE_UINT32_KHR;
case INTEL_CMAT_UINT8: return VK_COMPONENT_TYPE_UINT8_KHR;
case INTEL_CMAT_BFLOAT16: return VK_COMPONENT_TYPE_BFLOAT16_KHR;
const struct intel_device_info *devinfo = &pdevice->info;
if (!pdevice->has_cooperative_matrix)
return;
const bool emulated = debug_get_bool_option("INTEL_LOWER_DPAS", false);
const unsigned exec_size = devinfo->ver >= 20 ? 16 : 8;
#define FILL(a_type, b_type, c_type, r_type, ops_per_chan, sat) \
fill_cb(base, exec_size, \
VK_COMPONENT_TYPE_##a_type##_KHR, \
VK_COMPONENT_TYPE_##b_type##_KHR, \
VK_COMPONENT_TYPE_##c_type##_KHR, \
VK_COMPONENT_TYPE_##r_type##_KHR, \
ops_per_chan, sat)
/* Note: XeHP doesn't have this configuration. */
if (devinfo->ver >= 20 || emulated)
FILL(FLOAT16, FLOAT16, FLOAT16, FLOAT16, 2, false);
FILL(FLOAT16, FLOAT16, FLOAT32, FLOAT32, 2, false);
if (devinfo->has_bfloat16 && !emulated) {
if (devinfo->ver >= 20)
FILL(BFLOAT16, BFLOAT16, BFLOAT16, BFLOAT16, 2, false);
FILL(BFLOAT16, BFLOAT16, FLOAT32, FLOAT32, 2, false);
}
UNREACHABLE("invalid cooperative matrix component type in configuration");
FILL(SINT8, SINT8, SINT32, SINT32, 4, false);
FILL(UINT8, UINT8, UINT32, UINT32, 4, false);
#undef FILL
}
static VkScopeKHR
convert_scope(enum intel_cmat_scope scope)
static void
anv_fill_cooperative_matrix_prop(struct __vk_outarray *base, unsigned exec_size,
VkComponentTypeKHR a_type, VkComponentTypeKHR b_type,
VkComponentTypeKHR c_type, VkComponentTypeKHR r_type,
unsigned ops_per_chan, bool saturate)
{
switch (scope) {
case INTEL_CMAT_SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR;
default:
UNREACHABLE("invalid cooperative matrix scope in configuration");
vk_outarray(VkCooperativeMatrixPropertiesKHR) *out = (void *)base;
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 8,
.NSize = exec_size,
.KSize = 8 * ops_per_chan,
.AType = a_type,
.BType = b_type,
.CType = c_type,
.ResultType = r_type,
.saturatingAccumulation = saturate,
.scope = VK_SCOPE_SUBGROUP_KHR};
}
}
@ -3449,43 +3487,8 @@ VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
VkCooperativeMatrixPropertiesKHR* pProperties)
{
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
const struct intel_device_info *devinfo = &pdevice->info;
VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
if (!pdevice->has_cooperative_matrix)
return vk_outarray_status(&out);
const bool emulated = debug_get_bool_option("INTEL_LOWER_DPAS", false);
for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) {
const struct intel_cooperative_matrix_configuration *cfg =
&devinfo->cooperative_matrix_configurations[i];
if (cfg->scope == INTEL_CMAT_SCOPE_NONE)
break;
/* BFloat16 not supported by brw_lower_dpas emulation. */
if (emulated && cfg->a == INTEL_CMAT_BFLOAT16)
continue;
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
prop->MSize = cfg->m;
prop->NSize = cfg->n;
prop->KSize = cfg->k;
prop->AType = convert_component_type(cfg->a);
prop->BType = convert_component_type(cfg->b);
prop->CType = convert_component_type(cfg->c);
prop->ResultType = convert_component_type(cfg->result);
prop->saturatingAccumulation = VK_FALSE;
prop->scope = convert_scope(cfg->scope);
}
}
anv_fill_all_cooperative_matrix_props(pdevice, &out.base, anv_fill_cooperative_matrix_prop);
return vk_outarray_status(&out);
}