diff --git a/docs/features.txt b/docs/features.txt index c7e38b2e561..c6ef30118be 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -463,7 +463,7 @@ Vulkan 1.2 -- all DONE: anv, nvk, tu, vn VK_KHR_imageless_framebuffer DONE (anv, dzn, hasvk, lvp, nvk, radv, tu, v3dv, vn) VK_KHR_sampler_mirror_clamp_to_edge DONE (anv, dzn, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn) VK_KHR_separate_depth_stencil_layouts DONE (anv, dzn, hasvk, lvp, nvk, radv, tu, v3dv, vn) - VK_KHR_shader_atomic_int64 DONE (anv, lvp, nvk, radv, vn) + VK_KHR_shader_atomic_int64 DONE (anv, lvp, nvk, radv, vn, tu/a740+) VK_KHR_shader_float16_int8 DONE (anv, dzn, nvk, hasvk, lvp, radv, tu, vn) VK_KHR_shader_float_controls DONE (anv, dzn, hasvk, lvp, nvk, radv, tu, v3dv, vn) VK_KHR_shader_subgroup_extended_types DONE (anv, hasvk, lvp, nvk, radv, tu, vn) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 41a206e2cab..d20c87b8fbd 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -941,6 +941,13 @@ ir3_mem_access_size_align(nir_intrinsic_op intrin, uint8_t bytes, }; } +static bool +atomic_supported(const nir_instr * instr, const void * data) +{ + /* No atomic 64b arithmetic is supported in A7XX so far */ + return nir_instr_as_intrinsic(instr)->def.bit_size != 64; +} + void ir3_nir_lower_variant(struct ir3_shader_variant *so, const struct ir3_shader_nir_options *options, @@ -1034,6 +1041,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, /* Lower scratch writemasks */ progress |= OPT(s, nir_lower_wrmasks, should_split_wrmask, s); + progress |= OPT(s, nir_lower_atomics, atomic_supported); if (OPT(s, nir_lower_locals_to_regs, 1)) { progress = true; diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c index fc52cb47429..c244c609f9f 100644 --- a/src/freedreno/ir3/ir3_nir_lower_64b.c +++ b/src/freedreno/ir3/ir3_nir_lower_64b.c @@ -4,6 +4,10 @@ */ #include "ir3_nir.h" +#include "nir.h" +#include "nir_builder.h" +#include "nir_builder_opcodes.h" +#include "nir_intrinsics.h" /* * Lowering for 64b intrinsics generated with OpenCL or with @@ -29,6 +33,13 @@ lower_64b_intrinsics_filter(const nir_instr *instr, const void *unused) if (is_intrinsic_store(intr->intrinsic)) return nir_src_bit_size(intr->src[0]) == 64; + /* skip over ssbo atomics, we'll lower them later */ + if (intr->intrinsic == nir_intrinsic_ssbo_atomic || + intr->intrinsic == nir_intrinsic_ssbo_atomic_swap || + intr->intrinsic == nir_intrinsic_global_atomic_ir3 || + intr->intrinsic == nir_intrinsic_global_atomic_swap_ir3) + return false; + if (nir_intrinsic_dest_components(intr) == 0) return false; diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 8205fa1c916..a6e284f11c0 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -203,6 +203,7 @@ get_device_extensions(const struct tu_physical_device *device, .KHR_sampler_mirror_clamp_to_edge = true, .KHR_sampler_ycbcr_conversion = true, .KHR_separate_depth_stencil_layouts = true, + .KHR_shader_atomic_int64 = device->info->a7xx.has_64b_ssbo_atomics, .KHR_shader_draw_parameters = true, .KHR_shader_expect_assume = true, .KHR_shader_float16_int8 = true, @@ -396,7 +397,8 @@ tu_get_features(struct tu_physical_device *pdevice, features->storageBuffer8BitAccess = pdevice->info->a7xx.storage_8bit; features->uniformAndStorageBuffer8BitAccess = false; features->storagePushConstant8 = false; - features->shaderBufferInt64Atomics = false; + features->shaderBufferInt64Atomics = + pdevice->info->a7xx.has_64b_ssbo_atomics; features->shaderSharedInt64Atomics = false; features->shaderFloat16 = true; features->shaderInt8 = true;