nvk: Advertise VK_KHR_cooperative_matrix

v2: advertize more int combinations (Karol)
    enable saturatingAccumulation for integer matrices (Karol)

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32777>
This commit is contained in:
Mary Guillemard 2024-12-24 14:09:59 +01:00 committed by Marge Bot
parent 9c511f7301
commit 669c8a5145
2 changed files with 142 additions and 0 deletions

View file

@ -71,3 +71,4 @@ removed pre-dmabuf wl_drm support
deprecated EGL_WL_bind_wayland_display deprecated EGL_WL_bind_wayland_display
VK_KHR_shader_atomic_int64 on panvk/v10+ VK_KHR_shader_atomic_int64 on panvk/v10+
VK_EXT_host_image_copy on RADV (RDNA1+) VK_EXT_host_image_copy on RADV (RDNA1+)
VK_KHR_cooperative_matrix on nvk/turing+

View file

@ -94,6 +94,7 @@ nvk_get_device_extensions(const struct nvk_instance *instance,
.KHR_buffer_device_address = true, .KHR_buffer_device_address = true,
.KHR_calibrated_timestamps = true, .KHR_calibrated_timestamps = true,
.KHR_compute_shader_derivatives = info->cls_eng3d >= TURING_A, .KHR_compute_shader_derivatives = info->cls_eng3d >= TURING_A,
.KHR_cooperative_matrix = info->cls_eng3d >= TURING_A,
.KHR_copy_commands2 = true, .KHR_copy_commands2 = true,
.KHR_create_renderpass2 = true, .KHR_create_renderpass2 = true,
.KHR_dedicated_allocation = true, .KHR_dedicated_allocation = true,
@ -287,6 +288,9 @@ nvk_get_device_features(const struct nv_device_info *info,
const struct vk_device_extension_table *supported_extensions, const struct vk_device_extension_table *supported_extensions,
struct vk_features *features) struct vk_features *features)
{ {
/* TU11x uses the same shader model as other Turing but don't support the same features. */
bool is_tu11x = info->chipset == 0x167 || info->chipset == 0x168;
*features = (struct vk_features) { *features = (struct vk_features) {
/* Vulkan 1.0 */ /* Vulkan 1.0 */
.robustBufferAccess = true, .robustBufferAccess = true,
@ -443,6 +447,11 @@ nvk_get_device_features(const struct nv_device_info *info,
.hostImageCopy = info->cls_eng3d >= TURING_A, .hostImageCopy = info->cls_eng3d >= TURING_A,
.pushDescriptor = true, .pushDescriptor = true,
/* VK_KHR_cooperative_matrix */
/* TU11X can run coop matrix but the performances are abysal */
.cooperativeMatrix = info->cls_eng3d >= TURING_A && !is_tu11x,
.cooperativeMatrixRobustBufferAccess = false,
/* VK_KHR_compute_shader_derivatives */ /* VK_KHR_compute_shader_derivatives */
.computeDerivativeGroupQuads = info->cls_eng3d >= TURING_A, .computeDerivativeGroupQuads = info->cls_eng3d >= TURING_A,
.computeDerivativeGroupLinear = info->cls_eng3d >= TURING_A, .computeDerivativeGroupLinear = info->cls_eng3d >= TURING_A,
@ -965,6 +974,9 @@ nvk_get_device_properties(const struct nvk_instance *instance,
.defaultRobustnessImages = .defaultRobustnessImages =
VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT, VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
/* VK_KHR_cooperative_matrix */
.cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
/* VK_KHR_compute_shader_derivatives */ /* VK_KHR_compute_shader_derivatives */
.meshAndTaskShaderDerivatives = false, .meshAndTaskShaderDerivatives = false,
@ -1708,3 +1720,132 @@ nvk_GetPhysicalDeviceFragmentShadingRatesKHR(
return vk_outarray_status(&out); return vk_outarray_status(&out);
} }
VKAPI_ATTR VkResult VKAPI_CALL
nvk_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
VkCooperativeMatrixPropertiesKHR *pProperties)
{
VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
if (pdev->info.cls_compute < VOLTA_COMPUTE_A)
return VK_SUCCESS;
if (pdev->info.cls_compute >= TURING_COMPUTE_A) {
for (int use_result_f32 = 0; use_result_f32 < 2; use_result_f32++) {
const VkComponentTypeKHR input_type_cd = use_result_f32 ? VK_COMPONENT_TYPE_FLOAT32_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR;
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 8,
.KSize = 8,
.AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.CType = input_type_cd,
.ResultType = input_type_cd,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 8,
.KSize = 16,
.AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.CType = input_type_cd,
.ResultType = input_type_cd,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 16,
.KSize = 16,
.AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.CType = input_type_cd,
.ResultType = input_type_cd,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
}
}
/* IMMA got added with Turing */
if (pdev->info.cls_compute >= TURING_COMPUTE_A) {
for (int sat = 0; sat < 2; sat++) {
for (unsigned is_signed = 0; is_signed < 2; is_signed++) {
const VkComponentTypeKHR input_type_ab = is_signed ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR;
const VkComponentTypeKHR result_type = is_signed ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR;
/* we don't have hw support for uint32, so we can't saturate on C or D */
if (result_type == VK_COMPONENT_TYPE_UINT32_KHR && sat)
continue;
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 8,
.NSize = 8,
.KSize = 16,
.AType = input_type_ab,
.BType = input_type_ab,
.CType = result_type,
.ResultType = result_type,
.saturatingAccumulation = sat,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 8,
.KSize = 32,
.AType = input_type_ab,
.BType = input_type_ab,
.CType = result_type,
.ResultType = result_type,
.saturatingAccumulation = sat,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){
.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 16,
.KSize = 32,
.AType = input_type_ab,
.BType = input_type_ab,
.CType = result_type,
.ResultType = result_type,
.saturatingAccumulation = sat,
.scope = VK_SCOPE_SUBGROUP_KHR
};
}
}
}
}
return vk_outarray_status(&out);
}