From d3cfe04b3d0816279e95be63b50d20d83fc194de Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Thu, 9 Apr 2026 13:38:39 -0700 Subject: [PATCH] intel: Move cmat configurations to anv_physical_device Some cooperative properties are defined by the driver itself and are not a property of the HW. In particular whether the scope is subgroup or workgroup is not directly related to the HW. It could make sense encode the DPAS combinations into intel_device_info but we are not using all possible combinations yet and wouldn't be very useful in practice. The new scheme was based on radv and will set us up for also filling the flexible dimensions properties too. Note: this also fixes a subtle issue where ARL was incorrectly inheriting the PRE_XEHP configurations which included FLOAT16/FLOAT16/FLOAT16/FLOAT16 which it does not support. Reviewed-by: Ian Romanick Part-of: --- src/intel/dev/intel_device_info.c | 31 ------- src/intel/dev/intel_device_info.py | 31 ------- src/intel/vulkan/anv_physical_device.c | 113 +++++++++++++------------ 3 files changed, 58 insertions(+), 117 deletions(-) diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c index dc0f18fbc78..11aede99c1d 100644 --- a/src/intel/dev/intel_device_info.c +++ b/src/intel/dev/intel_device_info.c @@ -530,35 +530,8 @@ static const struct intel_device_info intel_device_info_chv = { .simulator_id = 13, }; -#define CMAT_PRE_XEHP_CONFIGURATIONS \ - .cooperative_matrix_configurations = { \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \ - } - -#define CMAT_XEHP_CONFIGURATIONS \ - .cooperative_matrix_configurations = { \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \ - } - -#define CMAT_XE2_CONFIGURATIONS \ - .cooperative_matrix_configurations = { \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_BFLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \ - { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \ - } - #define GFX9_FEATURES \ GFX8_FEATURES, \ - CMAT_PRE_XEHP_CONFIGURATIONS, \ .ver = 9, \ .has_sample_with_hiz = true, \ .timestamp_frequency = 12000000 @@ -1062,7 +1035,6 @@ static const struct intel_device_info intel_device_info_sg1 = { #define XEHP_FEATURES \ GFX12_FEATURES, \ - CMAT_XEHP_CONFIGURATIONS, \ .verx10 = 125, \ .has_lsc = true, \ .has_llc = false, \ @@ -1131,7 +1103,6 @@ static const struct intel_device_info intel_device_info_atsm_g11 = { #define MTL_CONFIG(platform_suffix) \ XEHP_FEATURES, XEHP_PLACEHOLDER_THREADS_AND_URB, \ - CMAT_PRE_XEHP_CONFIGURATIONS, \ .platform = INTEL_PLATFORM_ ## platform_suffix, \ .has_64bit_float = true, \ .has_64bit_float_via_math_pipe = true, \ @@ -1167,12 +1138,10 @@ static const struct intel_device_info intel_device_info_arl_h = { .has_bfloat16 = true, /* BSpec 55414 (r53716). */ .has_systolic = true, - CMAT_XEHP_CONFIGURATIONS, }; #define XE2_FEATURES \ XEHP_FEATURES, \ - CMAT_XE2_CONFIGURATIONS, \ .ver = 20, \ .verx10 = 200, \ .grf_size = 64, \ diff --git a/src/intel/dev/intel_device_info.py b/src/intel/dev/intel_device_info.py index fdb3623d62c..f5b8dcf4cc1 100644 --- a/src/intel/dev/intel_device_info.py +++ b/src/intel/dev/intel_device_info.py @@ -159,19 +159,6 @@ Struct("intel_device_info_pat_entry", This tells KMD what caching mode the CPU mapping should use. It has nothing to do with any PAT cache modes."""))]) -Enum("intel_cmat_scope", - [EnumValue("INTEL_CMAT_SCOPE_NONE", value=0), - "INTEL_CMAT_SCOPE_SUBGROUP"]) - -Enum("intel_cooperative_matrix_component_type", - ["INTEL_CMAT_FLOAT16", - "INTEL_CMAT_FLOAT32", - "INTEL_CMAT_SINT32", - "INTEL_CMAT_SINT8", - "INTEL_CMAT_UINT32", - "INTEL_CMAT_UINT8", - "INTEL_CMAT_BFLOAT16"]) - Enum("intel_engine_class", ["INTEL_ENGINE_CLASS_RENDER", "INTEL_ENGINE_CLASS_COPY", @@ -180,22 +167,6 @@ Enum("intel_engine_class", "INTEL_ENGINE_CLASS_COMPUTE", "INTEL_ENGINE_CLASS_INVALID"]) -Struct("intel_cooperative_matrix_configuration", - [Member("intel_cmat_scope", "scope", - comment=dedent("""\ - Matrix A is MxK. - Matrix B is KxN. - Matrix C and Matrix Result are MxN. - - Result = A * B + C;""")), - Member("uint8_t", "m"), - Member("uint8_t", "n"), - Member("uint8_t", "k"), - Member("intel_cooperative_matrix_component_type", "a"), - Member("intel_cooperative_matrix_component_type", "b"), - Member("intel_cooperative_matrix_component_type", "c"), - Member("intel_cooperative_matrix_component_type", "result")]) - Enum("intel_kmd_type", ["INTEL_KMD_TYPE_INVALID", "INTEL_KMD_TYPE_I915", @@ -485,8 +456,6 @@ Struct("intel_device_info", Member("bool", "probe_forced", comment="Device needed INTEL_FORCE_PROBE"), Member("intel_device_info_mem_desc", "mem"), Member("intel_device_info_pat_desc", "pat"), - Member("intel_cooperative_matrix_configuration", - "cooperative_matrix_configurations", array=16), Member("bool", "is_virtio")] ) diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c index 1482dedd5b2..a725788f39f 100644 --- a/src/intel/vulkan/anv_physical_device.c +++ b/src/intel/vulkan/anv_physical_device.c @@ -2885,8 +2885,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, goto fail_base; device->has_cooperative_matrix = - (device->info.has_systolic || debug_get_bool_option("INTEL_LOWER_DPAS", false)) && - device->info.cooperative_matrix_configurations[0].scope != INTEL_CMAT_SCOPE_NONE && + (device->info.has_systolic || + debug_get_bool_option("INTEL_LOWER_DPAS", false)) && !intel_use_jay_any_stage(&device->info); /* Because of Xe2 PAT selected compression and the Vulkan spec requirement @@ -3418,28 +3418,66 @@ VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR( return vk_outarray_status(&out); } -static VkComponentTypeKHR -convert_component_type(enum intel_cooperative_matrix_component_type t) +static void +anv_fill_all_cooperative_matrix_props(const struct anv_physical_device *pdevice, struct __vk_outarray *base, + void (*fill_cb)(struct __vk_outarray *base, unsigned exec_size, + VkComponentTypeKHR a_type, VkComponentTypeKHR b_type, + VkComponentTypeKHR c_type, VkComponentTypeKHR r_type, + unsigned ops_per_chan, bool saturate)) { - switch (t) { - case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR; - case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR; - case INTEL_CMAT_SINT32: return VK_COMPONENT_TYPE_SINT32_KHR; - case INTEL_CMAT_SINT8: return VK_COMPONENT_TYPE_SINT8_KHR; - case INTEL_CMAT_UINT32: return VK_COMPONENT_TYPE_UINT32_KHR; - case INTEL_CMAT_UINT8: return VK_COMPONENT_TYPE_UINT8_KHR; - case INTEL_CMAT_BFLOAT16: return VK_COMPONENT_TYPE_BFLOAT16_KHR; + const struct intel_device_info *devinfo = &pdevice->info; + if (!pdevice->has_cooperative_matrix) + return; + + const bool emulated = debug_get_bool_option("INTEL_LOWER_DPAS", false); + const unsigned exec_size = devinfo->ver >= 20 ? 16 : 8; + +#define FILL(a_type, b_type, c_type, r_type, ops_per_chan, sat) \ + fill_cb(base, exec_size, \ + VK_COMPONENT_TYPE_##a_type##_KHR, \ + VK_COMPONENT_TYPE_##b_type##_KHR, \ + VK_COMPONENT_TYPE_##c_type##_KHR, \ + VK_COMPONENT_TYPE_##r_type##_KHR, \ + ops_per_chan, sat) + + /* Note: XeHP doesn't have this configuration. */ + if (devinfo->ver >= 20 || emulated) + FILL(FLOAT16, FLOAT16, FLOAT16, FLOAT16, 2, false); + + FILL(FLOAT16, FLOAT16, FLOAT32, FLOAT32, 2, false); + + if (devinfo->has_bfloat16 && !emulated) { + if (devinfo->ver >= 20) + FILL(BFLOAT16, BFLOAT16, BFLOAT16, BFLOAT16, 2, false); + FILL(BFLOAT16, BFLOAT16, FLOAT32, FLOAT32, 2, false); } - UNREACHABLE("invalid cooperative matrix component type in configuration"); + + FILL(SINT8, SINT8, SINT32, SINT32, 4, false); + FILL(UINT8, UINT8, UINT32, UINT32, 4, false); + +#undef FILL } -static VkScopeKHR -convert_scope(enum intel_cmat_scope scope) +static void +anv_fill_cooperative_matrix_prop(struct __vk_outarray *base, unsigned exec_size, + VkComponentTypeKHR a_type, VkComponentTypeKHR b_type, + VkComponentTypeKHR c_type, VkComponentTypeKHR r_type, + unsigned ops_per_chan, bool saturate) { - switch (scope) { - case INTEL_CMAT_SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR; - default: - UNREACHABLE("invalid cooperative matrix scope in configuration"); + vk_outarray(VkCooperativeMatrixPropertiesKHR) *out = (void *)base; + + vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, out, p) + { + *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, + .MSize = 8, + .NSize = exec_size, + .KSize = 8 * ops_per_chan, + .AType = a_type, + .BType = b_type, + .CType = c_type, + .ResultType = r_type, + .saturatingAccumulation = saturate, + .scope = VK_SCOPE_SUBGROUP_KHR}; } } @@ -3449,43 +3487,8 @@ VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR( VkCooperativeMatrixPropertiesKHR* pProperties) { ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - const struct intel_device_info *devinfo = &pdevice->info; - VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount); - - if (!pdevice->has_cooperative_matrix) - return vk_outarray_status(&out); - - const bool emulated = debug_get_bool_option("INTEL_LOWER_DPAS", false); - - for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) { - const struct intel_cooperative_matrix_configuration *cfg = - &devinfo->cooperative_matrix_configurations[i]; - - if (cfg->scope == INTEL_CMAT_SCOPE_NONE) - break; - - /* BFloat16 not supported by brw_lower_dpas emulation. */ - if (emulated && cfg->a == INTEL_CMAT_BFLOAT16) - continue; - - vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) { - prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR; - - prop->MSize = cfg->m; - prop->NSize = cfg->n; - prop->KSize = cfg->k; - - prop->AType = convert_component_type(cfg->a); - prop->BType = convert_component_type(cfg->b); - prop->CType = convert_component_type(cfg->c); - prop->ResultType = convert_component_type(cfg->result); - - prop->saturatingAccumulation = VK_FALSE; - prop->scope = convert_scope(cfg->scope); - } - } - + anv_fill_all_cooperative_matrix_props(pdevice, &out.base, anv_fill_cooperative_matrix_prop); return vk_outarray_status(&out); }