From 6f4e26e54da860f2811bf88b49673ab2510320e5 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 30 Apr 2025 12:21:53 +0200 Subject: [PATCH] radv/gfx12+: enable VK_KHR_shader_bfloat16 GFX11 seems to have precision issues, so don't enable the extension there for now. Reviewed-by: Rhys Perry Part-of: --- docs/features.txt | 2 +- docs/relnotes/new_features.txt | 2 +- src/amd/vulkan/radv_physical_device.c | 53 ++++++++++++++------------- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index eeb754596d4..607655b0348 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -559,7 +559,7 @@ Khronos extensions that are not part of any Vulkan version: VK_KHR_ray_tracing_maintenance1 DONE (anv/gfx12.5+, radv/gfx10.3+, tu/a740+, vn) VK_KHR_ray_tracing_pipeline DONE (anv/gfx12.5+, lvp, radv/gfx10.3+, vn) VK_KHR_ray_tracing_position_fetch DONE (anv, radv/gfx10.3+, vn) - VK_KHR_shader_bfloat16 DONE (anv/gfx12.5+) + VK_KHR_shader_bfloat16 DONE (anv/gfx12.5+, radv/gfx12+) VK_KHR_shader_clock DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_KHR_shader_maximal_reconvergence DONE (anv, lvp, nvk, panvk/v10+, radv, vn) VK_KHR_shader_relaxed_extended_instruction DONE (anv, hasvk, nvk, panvk, radv, tu, v3dv, vn) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index d6d95b0c209..30491ceed43 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -19,5 +19,5 @@ VK_KHR_shader_terminate_invocation on panvk VK_EXT_shader_demote_to_helper_invocation on panvk VK_EXT_shader_replicated_composites on panvk VK_EXT_depth_bias_control on panvk -VK_KHR_shader_bfloat16 on anv/gfx125+ +VK_KHR_shader_bfloat16 on anv/gfx125+ and radv/gfx12+ VK_KHR_robustness2 on RADV diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 66730287842..ca8e695c89e 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -579,6 +579,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .KHR_sampler_ycbcr_conversion = true, .KHR_separate_depth_stencil_layouts = true, .KHR_shader_atomic_int64 = true, + .KHR_shader_bfloat16 = pdev->info.gfx_level >= GFX12, /* GFX11 has precision issues. */ .KHR_shader_clock = true, .KHR_shader_draw_parameters = true, .KHR_shader_expect_assume = true, @@ -1305,6 +1306,11 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc /* VK_EXT_device_memory_report */ .deviceMemoryReport = true, + + /* VK_KHR_shader_bfloat16 */ + .shaderBFloat16Type = true, + .shaderBFloat16DotProduct = true, + .shaderBFloat16CooperativeMatrix = radv_cooperative_matrix_enabled(pdev), }; } @@ -2845,34 +2851,31 @@ VKAPI_ATTR VkResult VKAPI_CALL radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount, VkCooperativeMatrixPropertiesKHR *pProperties) { + VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice); VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount); - vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) - { - *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, - .MSize = 16, - .NSize = 16, - .KSize = 16, - .AType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .BType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .CType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .ResultType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .saturatingAccumulation = false, - .scope = VK_SCOPE_SUBGROUP_KHR}; - } + for (unsigned bfloat = 0; bfloat < 2; bfloat++) { + for (unsigned fp32 = 0; fp32 < 2; fp32++) { + VkComponentTypeKHR ab_type = bfloat ? VK_COMPONENT_TYPE_BFLOAT16_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR; + VkComponentTypeKHR cd_type = fp32 ? VK_COMPONENT_TYPE_FLOAT32_KHR : ab_type; - vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) - { - *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, - .MSize = 16, - .NSize = 16, - .KSize = 16, - .AType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .BType = VK_COMPONENT_TYPE_FLOAT16_KHR, - .CType = VK_COMPONENT_TYPE_FLOAT32_KHR, - .ResultType = VK_COMPONENT_TYPE_FLOAT32_KHR, - .saturatingAccumulation = false, - .scope = VK_SCOPE_SUBGROUP_KHR}; + if (pdev->info.gfx_level < GFX12 && bfloat) + continue; /* BF16 isn't working precisely on GFX11. */ + + vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) + { + *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, + .MSize = 16, + .NSize = 16, + .KSize = 16, + .AType = ab_type, + .BType = ab_type, + .CType = cd_type, + .ResultType = cd_type, + .saturatingAccumulation = false, + .scope = VK_SCOPE_SUBGROUP_KHR}; + } + } } for (unsigned asigned = 0; asigned < 2; asigned++) {