radv/gfx12+: enable VK_KHR_shader_bfloat16
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

GFX11 seems to have precision issues, so don't enable the extension there for now.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34768>
This commit is contained in:
Georg Lehmann 2025-04-30 12:21:53 +02:00 committed by Marge Bot
parent a2209547db
commit 6f4e26e54d
3 changed files with 30 additions and 27 deletions

View file

@ -559,7 +559,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_KHR_ray_tracing_maintenance1 DONE (anv/gfx12.5+, radv/gfx10.3+, tu/a740+, vn)
VK_KHR_ray_tracing_pipeline DONE (anv/gfx12.5+, lvp, radv/gfx10.3+, vn)
VK_KHR_ray_tracing_position_fetch DONE (anv, radv/gfx10.3+, vn)
VK_KHR_shader_bfloat16 DONE (anv/gfx12.5+)
VK_KHR_shader_bfloat16 DONE (anv/gfx12.5+, radv/gfx12+)
VK_KHR_shader_clock DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
VK_KHR_shader_maximal_reconvergence DONE (anv, lvp, nvk, panvk/v10+, radv, vn)
VK_KHR_shader_relaxed_extended_instruction DONE (anv, hasvk, nvk, panvk, radv, tu, v3dv, vn)

View file

@ -19,5 +19,5 @@ VK_KHR_shader_terminate_invocation on panvk
VK_EXT_shader_demote_to_helper_invocation on panvk
VK_EXT_shader_replicated_composites on panvk
VK_EXT_depth_bias_control on panvk
VK_KHR_shader_bfloat16 on anv/gfx125+
VK_KHR_shader_bfloat16 on anv/gfx125+ and radv/gfx12+
VK_KHR_robustness2 on RADV

View file

@ -579,6 +579,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
.KHR_sampler_ycbcr_conversion = true,
.KHR_separate_depth_stencil_layouts = true,
.KHR_shader_atomic_int64 = true,
.KHR_shader_bfloat16 = pdev->info.gfx_level >= GFX12, /* GFX11 has precision issues. */
.KHR_shader_clock = true,
.KHR_shader_draw_parameters = true,
.KHR_shader_expect_assume = true,
@ -1305,6 +1306,11 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc
/* VK_EXT_device_memory_report */
.deviceMemoryReport = true,
/* VK_KHR_shader_bfloat16 */
.shaderBFloat16Type = true,
.shaderBFloat16DotProduct = true,
.shaderBFloat16CooperativeMatrix = radv_cooperative_matrix_enabled(pdev),
};
}
@ -2845,34 +2851,31 @@ VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
VkCooperativeMatrixPropertiesKHR *pProperties)
{
VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 16,
.KSize = 16,
.AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.CType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.ResultType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR};
}
for (unsigned bfloat = 0; bfloat < 2; bfloat++) {
for (unsigned fp32 = 0; fp32 < 2; fp32++) {
VkComponentTypeKHR ab_type = bfloat ? VK_COMPONENT_TYPE_BFLOAT16_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR;
VkComponentTypeKHR cd_type = fp32 ? VK_COMPONENT_TYPE_FLOAT32_KHR : ab_type;
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 16,
.KSize = 16,
.AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
.CType = VK_COMPONENT_TYPE_FLOAT32_KHR,
.ResultType = VK_COMPONENT_TYPE_FLOAT32_KHR,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR};
if (pdev->info.gfx_level < GFX12 && bfloat)
continue; /* BF16 isn't working precisely on GFX11. */
vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
{
*p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
.MSize = 16,
.NSize = 16,
.KSize = 16,
.AType = ab_type,
.BType = ab_type,
.CType = cd_type,
.ResultType = cd_type,
.saturatingAccumulation = false,
.scope = VK_SCOPE_SUBGROUP_KHR};
}
}
}
for (unsigned asigned = 0; asigned < 2; asigned++) {