nvk: reorder exposed coop matrix types
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

The specification says that the fastest types should come first. So we
order the biggest matrix sizes first, because they generally have lower
overhead in terms of address calculation and provide more performance.

This also makes NVK more aligned with Nvidia's implementation.

Acked-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39614>
This commit is contained in:
Karol Herbst 2025-11-25 12:25:53 +01:00 committed by Marge Bot
parent 4ecd2b136f
commit 0daffc79b0

View file

@ -1875,38 +1875,6 @@ nvk_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDev
for (int use_result_f32 = 0; use_result_f32 < 2; use_result_f32++) {
const VkComponentTypeKHR input_type_cd = use_result_f32 ? VK_COMPONENT_TYPE_FLOAT32_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR;
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 8,
.KSize = 8,
.AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.CType = input_type_cd,
.ResultType = input_type_cd,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 8,
.KSize = 16,
.AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.CType = input_type_cd,
.ResultType = input_type_cd,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
@ -1922,6 +1890,38 @@ nvk_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDev
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 8,
.KSize = 16,
.AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.CType = input_type_cd,
.ResultType = input_type_cd,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 8,
.KSize = 8,
.AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.CType = input_type_cd,
.ResultType = input_type_cd,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
}
}
@ -1936,22 +1936,36 @@ nvk_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDev
if (result_type == VK_COMPONENT_TYPE_UINT32_KHR && sat)
continue;
if (pdev->info.cls_compute < BLACKWELL_COMPUTE_A) {
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 8,
.NSize = 8,
.KSize = 16,
.AType = input_type_ab,
.BType = input_type_ab,
.CType = result_type,
.ResultType = result_type,
.saturatingAccumulation = sat,
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 16,
.KSize = 32,
.AType = input_type_ab,
.BType = input_type_ab,
.CType = result_type,
.ResultType = result_type,
.saturatingAccumulation = sat,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 8,
.KSize = 32,
.AType = input_type_ab,
.BType = input_type_ab,
.CType = result_type,
.ResultType = result_type,
.saturatingAccumulation = sat,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
if (pdev->info.cls_compute >= AMPERE_COMPUTE_A) {
@ -1972,36 +1986,22 @@ nvk_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDev
}
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 8,
.KSize = 32,
.AType = input_type_ab,
.BType = input_type_ab,
.CType = result_type,
.ResultType = result_type,
.saturatingAccumulation = sat,
if (pdev->info.cls_compute < BLACKWELL_COMPUTE_A) {
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 8,
.NSize = 8,
.KSize = 16,
.AType = input_type_ab,
.BType = input_type_ab,
.CType = result_type,
.ResultType = result_type,
.saturatingAccumulation = sat,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 16,
.KSize = 32,
.AType = input_type_ab,
.BType = input_type_ab,
.CType = result_type,
.ResultType = result_type,
.saturatingAccumulation = sat,
.scope = VK_SCOPE_SUBGROUP_KHR
};
};
}
}
}
}