From a9da8ec49bccd671436ea3af33b8eefe6ee73522 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Thu, 21 May 2026 11:02:41 +0200 Subject: [PATCH] nvk: enable VK_KHR_shader_fma This allows rusticl to make use of the native fma instructions giving us better OpenCL performance. e.g. ProjectPhysX_OpenCL-Benchmark on my GA102: FP32 0.610 -> 11.474 TFLOPs/s Reviewed-by: Mel Henning Part-of: --- docs/features.txt | 2 +- docs/relnotes/new_features.txt | 1 + src/nouveau/vulkan/nvk_physical_device.c | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 51252e12888..91a960ae296 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -581,7 +581,7 @@ Khronos extensions that are not part of any Vulkan version: VK_KHR_shader_bfloat16 DONE (anv/gfx12.5+, radv/gfx12+, vn) VK_KHR_shader_clock DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, vn) VK_KHR_shader_constant_data DONE (anv, radv) - VK_KHR_shader_fma DONE (kk, radv, vn) + VK_KHR_shader_fma DONE (kk, nvk, radv, vn) VK_KHR_shader_maximal_reconvergence DONE (anv, hk, kk, lvp, nvk, panvk/v10+, radv, vn) VK_KHR_shader_quad_control DONE (anv, hk, lvp, nvk, panvk/v10+, radv, vn) VK_KHR_shader_relaxed_extended_instruction DONE (anv, hasvk, hk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index 8a578371886..1556e1a1839 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -16,3 +16,4 @@ OpenCL 3.1 support for rusticl on asahi, iris, radeonsi, llvmpipe and zink VK_KHR_workgroup_memory_explicit_layout on pvr VK_KHR_maintenance5 on pvr VK_KHR_shader_fma on RADV +VK_KHR_shader_fma on nvk diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index 4d36f49f45e..c8145055f77 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -184,6 +184,7 @@ nvk_get_device_extensions(const struct nvk_instance *instance, .KHR_shader_float_controls = true, .KHR_shader_float_controls2 = true, .KHR_shader_float16_int8 = true, + .KHR_shader_fma = true, .KHR_shader_integer_dot_product = true, .KHR_shader_maximal_reconvergence = true, .KHR_shader_non_semantic_info = true, @@ -772,6 +773,11 @@ nvk_get_device_features(const struct nv_device_info *info, .presentAtRelativeTime = true, .presentAtAbsoluteTime = true, #endif + + /* VK_KHR_shader_fma */ + .shaderFmaFloat16 = info->sm >= 70, + .shaderFmaFloat32 = true, + .shaderFmaFloat64 = true, }; }