From 0daffc79b07c367392cbd4226650a8a3d1ec1e64 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 25 Nov 2025 12:25:53 +0100 Subject: [PATCH] nvk: reorder exposed coop matrix types The specification says that the fastest types should come first. So we order the biggest matrix sizes first, because they generally have lower overhead in terms of address calculation and provide more performance. This also makes NVK more aligned with Nvidia's implementation. Acked-by: Mel Henning Part-of: --- src/nouveau/vulkan/nvk_physical_device.c | 152 +++++++++++------------ 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index 72cfe185a67..8cecec6897e 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -1875,38 +1875,6 @@ nvk_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDev for (int use_result_f32 = 0; use_result_f32 < 2; use_result_f32++) { const VkComponentTypeKHR input_type_cd = use_result_f32 ? VK_COMPONENT_TYPE_FLOAT32_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR; - vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) - { - *p = (struct VkCooperativeMatrixPropertiesKHR){ - .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, - .MSize = 16, - .NSize = 8, - .KSize = 8, - .AType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .BType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .CType = input_type_cd, - .ResultType = input_type_cd, - .saturatingAccumulation = false, - .scope = VK_SCOPE_SUBGROUP_KHR - }; - } - - vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) - { - *p = (struct VkCooperativeMatrixPropertiesKHR){ - .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, - .MSize = 16, - .NSize = 8, - .KSize = 16, - .AType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .BType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .CType = input_type_cd, - .ResultType = input_type_cd, - .saturatingAccumulation = false, - .scope = VK_SCOPE_SUBGROUP_KHR - }; - } - vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) { *p = (struct VkCooperativeMatrixPropertiesKHR){ @@ -1922,6 +1890,38 @@ nvk_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDev .scope = VK_SCOPE_SUBGROUP_KHR }; } + + vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) + { + *p = (struct VkCooperativeMatrixPropertiesKHR){ + .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, + .MSize = 16, + .NSize = 8, + .KSize = 16, + .AType = VK_COMPONENT_TYPE_FLOAT16_KHR, + .BType = VK_COMPONENT_TYPE_FLOAT16_KHR, + .CType = input_type_cd, + .ResultType = input_type_cd, + .saturatingAccumulation = false, + .scope = VK_SCOPE_SUBGROUP_KHR + }; + } + + vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) + { + *p = (struct VkCooperativeMatrixPropertiesKHR){ + .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, + .MSize = 16, + .NSize = 8, + .KSize = 8, + .AType = VK_COMPONENT_TYPE_FLOAT16_KHR, + .BType = VK_COMPONENT_TYPE_FLOAT16_KHR, + .CType = input_type_cd, + .ResultType = input_type_cd, + .saturatingAccumulation = false, + .scope = VK_SCOPE_SUBGROUP_KHR + }; + } } } @@ -1936,22 +1936,36 @@ nvk_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDev if (result_type == VK_COMPONENT_TYPE_UINT32_KHR && sat) continue; - if (pdev->info.cls_compute < BLACKWELL_COMPUTE_A) { - vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) - { - *p = (struct VkCooperativeMatrixPropertiesKHR){ - .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, - .MSize = 8, - .NSize = 8, - .KSize = 16, - .AType = input_type_ab, - .BType = input_type_ab, - .CType = result_type, - .ResultType = result_type, - .saturatingAccumulation = sat, + vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) + { + *p = (struct VkCooperativeMatrixPropertiesKHR){ + .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, + .MSize = 16, + .NSize = 16, + .KSize = 32, + .AType = input_type_ab, + .BType = input_type_ab, + .CType = result_type, + .ResultType = result_type, + .saturatingAccumulation = sat, .scope = VK_SCOPE_SUBGROUP_KHR - }; - } + }; + } + + vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) + { + *p = (struct VkCooperativeMatrixPropertiesKHR){ + .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, + .MSize = 16, + .NSize = 8, + .KSize = 32, + .AType = input_type_ab, + .BType = input_type_ab, + .CType = result_type, + .ResultType = result_type, + .saturatingAccumulation = sat, + .scope = VK_SCOPE_SUBGROUP_KHR + }; } if (pdev->info.cls_compute >= AMPERE_COMPUTE_A) { @@ -1972,36 +1986,22 @@ nvk_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDev } } - vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) - { - *p = (struct VkCooperativeMatrixPropertiesKHR){ - .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, - .MSize = 16, - .NSize = 8, - .KSize = 32, - .AType = input_type_ab, - .BType = input_type_ab, - .CType = result_type, - .ResultType = result_type, - .saturatingAccumulation = sat, + if (pdev->info.cls_compute < BLACKWELL_COMPUTE_A) { + vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) + { + *p = (struct VkCooperativeMatrixPropertiesKHR){ + .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, + .MSize = 8, + .NSize = 8, + .KSize = 16, + .AType = input_type_ab, + .BType = input_type_ab, + .CType = result_type, + .ResultType = result_type, + .saturatingAccumulation = sat, .scope = VK_SCOPE_SUBGROUP_KHR - }; - } - - vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) - { - *p = (struct VkCooperativeMatrixPropertiesKHR){ - .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, - .MSize = 16, - .NSize = 16, - .KSize = 32, - .AType = input_type_ab, - .BType = input_type_ab, - .CType = result_type, - .ResultType = result_type, - .saturatingAccumulation = sat, - .scope = VK_SCOPE_SUBGROUP_KHR - }; + }; + } } } }