From ed77f67e445d5104c81b999bbc7cbafc75090838 Mon Sep 17 00:00:00 2001 From: "Mi, Yanfeng" Date: Sat, 7 Dec 2024 01:30:52 +0800 Subject: [PATCH] anv: add emulated 64bit integer storage support By turning a R64 into R32G32 Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/intel/vulkan/anv_formats.c | 122 ++++++++++++++++++------- src/intel/vulkan/anv_image.c | 11 ++- src/intel/vulkan/anv_physical_device.c | 5 + src/intel/vulkan/anv_pipeline.c | 29 +++++- src/intel/vulkan/anv_private.h | 8 ++ 5 files changed, 137 insertions(+), 38 deletions(-) diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 4fab3725c8f..c005d8f7ab3 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -86,6 +86,19 @@ .flags = __flags, \ } +#define fmtvbi(__vk_fmt, __hw_vbo_fmt, __hw_img_fmt) \ + [VK_ENUM_OFFSET(__vk_fmt)] = { \ + .planes = { \ + { .isl_format = __hw_img_fmt, \ + .vbo_format = __hw_vbo_fmt, \ + .swizzle = RGBA, \ + .aspect = VK_IMAGE_ASPECT_COLOR_BIT, \ + }, \ + }, \ + .vk_format = __vk_fmt, \ + .n_planes = 1, \ + } + #define fmt1(__vk_fmt, __hw_fmt) \ swiz_fmt1(__vk_fmt, __hw_fmt, RGBA) @@ -263,8 +276,8 @@ static const struct anv_format main_formats[] = { fmt1(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT), fmt1(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT), fmt1(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT), - fmtvb(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU), - fmtvb(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU), + fmtvbi(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU, ISL_FORMAT_R32G32_UINT), + fmtvbi(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU, ISL_FORMAT_R32G32_UINT), fmtvb(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_PASSTHRU), fmtvb(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU), fmtvb(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU), @@ -614,6 +627,34 @@ anv_get_image_format_features2(const struct anv_physical_device *physical_device return flags; } + if (anv_is_storage_format_emulated(vk_format)) { + /* Somehow the block shape is not right */ + if (is_sparse) + return 0; + + if (isl_mod_info) { + /* The emulation shader code doesn't work with any kind of + * compression or fast clear. + */ + if (isl_mod_info->supports_render_compression || + isl_mod_info->supports_media_compression || + isl_mod_info->supports_clear_color) + return 0; + + /* If it's not linear or the select tiling format for emulation we + * can't support it. + */ + if (isl_mod_info->tiling != ISL_TILING_LINEAR || + isl_mod_info->tiling != physical_device->isl_dev.shader_tiling) + return 0; + } + + return VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT | + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_ATOMIC_BIT; + } + const VkImageAspectFlags aspects = vk_format_aspects(vk_format); if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { @@ -724,6 +765,9 @@ anv_get_image_format_features2(const struct anv_physical_device *physical_device if (flags & VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT) flags |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT; + /* We only support single component 32bit formats for image atomics. We + * also emulate for some formats using software detiling & untyped atomics. + */ if (base_isl_format == ISL_FORMAT_R32_SINT || base_isl_format == ISL_FORMAT_R32_UINT || base_isl_format == ISL_FORMAT_R32_FLOAT) @@ -947,45 +991,54 @@ get_buffer_format_features2(const struct intel_device_info *devinfo, const enum isl_format vbo_format = anv_format->planes[0].vbo_format; if (img_format != ISL_FORMAT_UNSUPPORTED) { - if (isl_format_supports_sampling(devinfo, img_format) && - !isl_format_is_compressed(img_format)) - flags |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT; + if (anv_is_storage_format_emulated(vk_format)) { + /* Emulation through shader detiling does not allow + * STORAGE_(READ|WRITE)_WITHOUT_FORMAT_BIT + */ + flags = VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT | + VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT | + VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + } else { + if (isl_format_supports_sampling(devinfo, img_format) && + !isl_format_is_compressed(img_format)) + flags |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT; - if (isl_is_storage_image_format(devinfo, img_format)) - flags |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT; + if (isl_is_storage_image_format(devinfo, img_format)) + flags |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT; - if (img_format == ISL_FORMAT_R32_SINT || img_format == ISL_FORMAT_R32_UINT) - flags |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + if (img_format == ISL_FORMAT_R32_SINT || img_format == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; - if (isl_format_supports_typed_reads(devinfo, img_format)) - flags |= VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT; - if (isl_format_supports_typed_writes(devinfo, img_format)) - flags |= VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT; + if (isl_format_supports_typed_reads(devinfo, img_format)) + flags |= VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT; + if (isl_format_supports_typed_writes(devinfo, img_format)) + flags |= VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT; - if (devinfo->has_ray_tracing) { + if (devinfo->has_ray_tracing) { #if ANV_SUPPORT_RT_GRL - switch (vk_format) { - case VK_FORMAT_R32G32_SFLOAT: - case VK_FORMAT_R32G32B32_SFLOAT: - case VK_FORMAT_R16G16_SFLOAT: - case VK_FORMAT_R16G16B16A16_SFLOAT: - case VK_FORMAT_R16G16_SNORM: - case VK_FORMAT_R16G16B16A16_SNORM: - case VK_FORMAT_R16G16B16A16_UNORM: - case VK_FORMAT_R16G16_UNORM: - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8G8B8A8_SNORM: - case VK_FORMAT_R8G8_SNORM: - flags |= VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR; - break; - default: - break; - } + switch (vk_format) { + case VK_FORMAT_R32G32_SFLOAT: + case VK_FORMAT_R32G32B32_SFLOAT: + case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_R16G16B16A16_SFLOAT: + case VK_FORMAT_R16G16_SNORM: + case VK_FORMAT_R16G16B16A16_SNORM: + case VK_FORMAT_R16G16B16A16_UNORM: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8B8A8_SNORM: + case VK_FORMAT_R8G8_SNORM: + flags |= VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR; + break; + default: + break; + } #else - if (vk_acceleration_struct_vtx_format_supported(vk_format)) - flags |= VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR; + if (vk_acceleration_struct_vtx_format_supported(vk_format)) + flags |= VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR; #endif + } } } @@ -2028,6 +2081,7 @@ void anv_GetPhysicalDeviceSparseImageFormatProperties2( isl_surf_usage_flags_t isl_usage = anv_image_choose_isl_surf_usage(physical_device, + pFormatInfo->format, vk_create_flags, pFormatInfo->usage, 0, aspect, VK_IMAGE_COMPRESSION_DEFAULT_EXT); diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index fc0ce7df5a5..541dcddb67c 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -205,6 +205,7 @@ memory_range_merge(struct anv_image_memory_range *a, isl_surf_usage_flags_t anv_image_choose_isl_surf_usage(struct anv_physical_device *device, + VkFormat vk_format, VkImageCreateFlags vk_create_flags, VkImageUsageFlags vk_usage, isl_surf_usage_flags_t isl_extra_usage, @@ -300,6 +301,11 @@ anv_image_choose_isl_surf_usage(struct anv_physical_device *device, if (comp_flags & VK_IMAGE_COMPRESSION_DISABLED_EXT) isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; + if (anv_is_storage_format_emulated(vk_format)) { + isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT | + ISL_SURF_USAGE_SOFTWARE_DETILING; + } + return isl_usage; } @@ -1252,6 +1258,7 @@ add_all_surfaces_implicit_layout( VkImageUsageFlags vk_usage = vk_image_usage(&image->vk, aspect); isl_surf_usage_flags_t isl_usage = anv_image_choose_isl_surf_usage(device->physical, + image->vk.format, image->vk.create_flags, vk_usage, isl_extra_usage_flags, aspect, image->vk.compr_flags); @@ -1857,8 +1864,8 @@ anv_image_init(struct anv_device *device, struct anv_image *image, device->physical, image->emu_plane_format, 0, image->vk.tiling); isl_surf_usage_flags_t isl_usage = anv_image_choose_isl_surf_usage( - device->physical, image->vk.create_flags, image->vk.usage, - isl_extra_usage_flags, VK_IMAGE_ASPECT_COLOR_BIT, + device->physical, image->vk.format, image->vk.create_flags, + image->vk.usage, isl_extra_usage_flags, VK_IMAGE_ASPECT_COLOR_BIT, image->vk.compr_flags); r = add_primary_surface(device, image, plane, plane_format, diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c index b2eda469415..37a20edcef1 100644 --- a/src/intel/vulkan/anv_physical_device.c +++ b/src/intel/vulkan/anv_physical_device.c @@ -334,6 +334,7 @@ get_device_extensions(const struct anv_physical_device *device, .EXT_shader_atomic_float = true, .EXT_shader_atomic_float2 = true, .EXT_shader_demote_to_helper_invocation = true, + .EXT_shader_image_atomic_int64 = true, .EXT_shader_module_identifier = true, .EXT_shader_replicated_composites = true, .EXT_shader_stencil_export = true, @@ -907,6 +908,10 @@ get_features(const struct anv_physical_device *pdevice, /* VK_EXT_host_image_copy */ .hostImageCopy = true, + + /* VK_EXT_shader_image_atomic_int64 */ + .shaderImageInt64Atomics = true, + .sparseImageInt64Atomics = false, }; /* The new DOOM and Wolfenstein games require depthBounds without diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 43ff3f9fd21..6e5a04218cd 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -910,6 +910,16 @@ print_ubo_load(nir_builder *b, } #endif +static bool +accept_64bit_atomic_cb(const nir_intrinsic_instr *intrin, const void *data) +{ + return (intrin->intrinsic == nir_intrinsic_image_atomic || + intrin->intrinsic == nir_intrinsic_image_atomic_swap || + intrin->intrinsic == nir_intrinsic_image_deref_atomic || + intrin->intrinsic == nir_intrinsic_image_deref_atomic_swap) && + intrin->def.bit_size == 64; +} + static void anv_pipeline_lower_nir(struct anv_pipeline *pipeline, void *mem_ctx, @@ -967,16 +977,31 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + /* Ensure robustness, do this before brw_nir_lower_storage_image so that + * added image size intrinsics for bounds checkings are properly lowered + * for cube images. + */ + NIR_PASS(_, nir, nir_lower_robust_access, + accept_64bit_atomic_cb, NULL); + NIR_PASS(_, nir, brw_nir_lower_storage_image, &(struct brw_nir_lower_storage_image_opts) { /* Anv only supports Gfx9+ which has better defined typed read - * behavior. It allows us to only have to care about lowering - * loads. + * behavior. */ .devinfo = compiler->devinfo, .lower_loads = true, + .lower_stores_64bit = true, }); + /* Switch from image to global */ + NIR_PASS(_, nir, nir_lower_image_atomics_to_global, + accept_64bit_atomic_cb, NULL); + + /* Detile for global */ + NIR_PASS(_, nir, brw_nir_lower_texel_address, compiler->devinfo, + pdevice->isl_dev.shader_tiling); + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global, nir_address_format_64bit_global); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 979cadfee10..07995cd080b 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -5374,6 +5374,13 @@ anv_is_compressed_format_emulated(const struct anv_physical_device *pdevice, format) != VK_FORMAT_UNDEFINED; } +static inline bool +anv_is_storage_format_emulated(VkFormat format) +{ + return format == VK_FORMAT_R64_SINT || + format == VK_FORMAT_R64_UINT; +} + static inline struct isl_swizzle anv_swizzle_for_render(struct isl_swizzle swizzle) { @@ -5996,6 +6003,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, isl_surf_usage_flags_t anv_image_choose_isl_surf_usage(struct anv_physical_device *device, + VkFormat vk_format, VkImageCreateFlags vk_create_flags, VkImageUsageFlags vk_usage, isl_surf_usage_flags_t isl_extra_usage,