diff --git a/docs/features.txt b/docs/features.txt index 6753222c43d..7459ac9c21f 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -683,7 +683,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_rgba10x6_formats DONE (panvk/v11+) VK_EXT_robustness2 DONE (anv, hasvk, hk, kk, lvp, nvk, panvk/v10+, pvr, radv, tu, v3dv, vn) VK_EXT_sample_locations DONE (anv, hasvk, hk, lvp, nvk, radv, tu/a650+, vn) - VK_EXT_shader_atomic_float DONE (anv, hasvk, kk, lvp, panvk, radv, tu, vn) + VK_EXT_shader_atomic_float DONE (anv, hasvk, kk, lvp, nvk, panvk, radv, tu, vn) VK_EXT_shader_atomic_float2 DONE (anv, lvp, radv, vn) VK_EXT_shader_float8 DONE (radv/gfx12+, vn) VK_EXT_shader_image_atomic_int64 DONE (anv, lvp, nvk, radv, tu/a740+, vn) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index eccf4343595..ef0cd3cf022 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -18,3 +18,4 @@ VK_KHR_maintenance5 on pvr VK_KHR_shader_fma on RADV VK_KHR_shader_fma on nvk Support for G1-Ultra, G1-Premium and G1-Pro GPUs on Panfrost and PanVK +VK_EXT_shader_atomic_float on nvk diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index 1b6761bf393..e33f26362f7 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -3634,8 +3634,9 @@ impl SM70Op for OpAtom { "64-bit Shared atomics only support CmpExch or Exch" ); assert!( - !self.atom_type.is_float(), - "Shared atomics don't support float" + !self.atom_type.is_float() + || self.atom_op == AtomOp::Add, + "Shared float atomics only supports add" ); e.set_atom_op(87..91, self.atom_op); } diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index e39dfb186c7..766f9311d9a 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -986,8 +986,10 @@ atomic_supported(const nir_instr *instr, const void *data) { /* Shared atomics don't support 64-bit arithmetic */ const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_atomic_op atomic_op = nir_intrinsic_atomic_op(intr); return !(intr->intrinsic == nir_intrinsic_shared_atomic && - intr->def.bit_size == 64); + (intr->def.bit_size == 64 || + (intr->def.bit_size == 32 && atomic_op == nir_atomic_op_fadd))); } static unsigned diff --git a/src/nouveau/vulkan/nvk_format.c b/src/nouveau/vulkan/nvk_format.c index 73344ed94e4..42696d467f9 100644 --- a/src/nouveau/vulkan/nvk_format.c +++ b/src/nouveau/vulkan/nvk_format.c @@ -24,6 +24,7 @@ nvk_format_supports_atomics(const struct nvk_physical_device *pdev, switch (p_format) { case PIPE_FORMAT_R32_UINT: case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_R32_FLOAT: return true; case PIPE_FORMAT_R64_UINT: case PIPE_FORMAT_R64_SINT: diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index 788501d99a7..b4c255c5e86 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -279,6 +279,7 @@ nvk_get_device_extensions(const struct nvk_instance *instance, .EXT_sampler_filter_minmax = info->cls_eng3d >= MAXWELL_B, .EXT_scalar_block_layout = true, .EXT_separate_stencil_usage = true, + .EXT_shader_atomic_float = true, .EXT_shader_image_atomic_int64 = info->cls_eng3d >= KEPLER_B, .EXT_shader_demote_to_helper_invocation = true, .EXT_shader_module_identifier = true, @@ -720,6 +721,20 @@ nvk_get_device_features(const struct nv_device_info *info, .robustImageAccess2 = true, .nullDescriptor = true, + /* VK_EXT_shader_atomic_float */ + .shaderBufferFloat32Atomics = true, + .shaderBufferFloat32AtomicAdd = true, + .shaderBufferFloat64Atomics = true, + .shaderBufferFloat64AtomicAdd = true, + .shaderSharedFloat32Atomics = true, + .shaderSharedFloat32AtomicAdd = true, + .shaderSharedFloat64Atomics = true, + .shaderSharedFloat64AtomicAdd = true, + .shaderImageFloat32Atomics = true, + .shaderImageFloat32AtomicAdd = true, + .sparseImageFloat32Atomics = true, + .sparseImageFloat32AtomicAdd = true, + /* VK_EXT_shader_image_atomic_int64 */ .shaderImageInt64Atomics = info->cls_eng3d >= KEPLER_B, .sparseImageInt64Atomics = info->cls_eng3d >= MAXWELL_A,