diff --git a/docs/features.txt b/docs/features.txt index a09cae202dd..851f18df23a 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -651,6 +651,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_sample_locations DONE (anv, hasvk, hk, nvk, radv, tu/a650+, vn) VK_EXT_shader_atomic_float DONE (anv, hasvk, lvp, radv, vn) VK_EXT_shader_atomic_float2 DONE (anv, lvp, radv, vn) + VK_EXT_shader_float8 DONE (radv/gfx12+) VK_EXT_shader_image_atomic_int64 DONE (anv, nvk, radv, vn) VK_EXT_shader_object DONE (lvp, hk, nvk, radv) VK_EXT_shader_replicated_composites DONE (anv, dzn, hasvk, lvp, nvk, panvk, radv, tu, vn) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index e666ae5ecfa..ba9d3093cbb 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -55,3 +55,4 @@ CL_sRGBA and CL_sBGRA images cl_khr_extended_bit_ops VK_KHR_shader_clock on panvk GL_EXT_shader_realtime_clock on panfrost/v6+ +VK_KHR_shader_float8 on RADV (RDNA4+) diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index bc7657cbc92..b58ae86a3b3 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -706,6 +706,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .EXT_shader_atomic_float = true, .EXT_shader_atomic_float2 = true, .EXT_shader_demote_to_helper_invocation = true, + .EXT_shader_float8 = pdev->info.gfx_level >= GFX12 && !pdev->use_llvm, .EXT_shader_image_atomic_int64 = true, .EXT_shader_module_identifier = true, .EXT_shader_object = !pdev->use_llvm && !(instance->debug_flags & RADV_DEBUG_NO_ESO), @@ -1330,6 +1331,10 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc /* VK_KHR_unified_layouts */ .unifiedImageLayouts = true, .unifiedImageLayoutsVideo = true, + + /* VK_EXT_shader_float8 */ + .shaderFloat8 = true, + .shaderFloat8CooperativeMatrix = radv_cooperative_matrix_enabled(pdev), }; } @@ -2893,6 +2898,30 @@ radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDe VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice); VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount); + if (pdev->info.gfx_level >= GFX12) { + for (unsigned e5m2_a = 0; e5m2_a < 2; e5m2_a++) { + for (unsigned e5m2_b = 0; e5m2_b < 2; e5m2_b++) { + VkComponentTypeKHR a_type = e5m2_a ? VK_COMPONENT_TYPE_FLOAT8_E5M2_EXT : VK_COMPONENT_TYPE_FLOAT8_E4M3_EXT; + VkComponentTypeKHR b_type = e5m2_b ? VK_COMPONENT_TYPE_FLOAT8_E5M2_EXT : VK_COMPONENT_TYPE_FLOAT8_E4M3_EXT; + + vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p) + { + *p = (struct VkCooperativeMatrixPropertiesKHR){ + .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, + .MSize = 16, + .NSize = 16, + .KSize = 16, + .AType = a_type, + .BType = b_type, + .CType = VK_COMPONENT_TYPE_FLOAT32_KHR, + .ResultType = VK_COMPONENT_TYPE_FLOAT32_KHR, + .saturatingAccumulation = false, + .scope = VK_SCOPE_SUBGROUP_KHR}; + } + } + } + } + for (unsigned bfloat = 0; bfloat < 2; bfloat++) { for (unsigned fp32 = 0; fp32 < 2; fp32++) { VkComponentTypeKHR ab_type = bfloat ? VK_COMPONENT_TYPE_BFLOAT16_KHR : VK_COMPONENT_TYPE_FLOAT16_KHR;