anv: add emulated 64bit integer storage support

By turning a R64 into R32G32

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32676>
This commit is contained in:
Mi, Yanfeng 2024-12-07 01:30:52 +08:00 committed by Marge Bot
parent 723e52cbcc
commit ed77f67e44
5 changed files with 137 additions and 38 deletions

View file

@ -86,6 +86,19 @@
.flags = __flags, \ .flags = __flags, \
} }
#define fmtvbi(__vk_fmt, __hw_vbo_fmt, __hw_img_fmt) \
[VK_ENUM_OFFSET(__vk_fmt)] = { \
.planes = { \
{ .isl_format = __hw_img_fmt, \
.vbo_format = __hw_vbo_fmt, \
.swizzle = RGBA, \
.aspect = VK_IMAGE_ASPECT_COLOR_BIT, \
}, \
}, \
.vk_format = __vk_fmt, \
.n_planes = 1, \
}
#define fmt1(__vk_fmt, __hw_fmt) \ #define fmt1(__vk_fmt, __hw_fmt) \
swiz_fmt1(__vk_fmt, __hw_fmt, RGBA) swiz_fmt1(__vk_fmt, __hw_fmt, RGBA)
@ -263,8 +276,8 @@ static const struct anv_format main_formats[] = {
fmt1(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT), fmt1(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT),
fmt1(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT), fmt1(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT),
fmt1(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT), fmt1(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT),
fmtvb(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU), fmtvbi(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU, ISL_FORMAT_R32G32_UINT),
fmtvb(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU), fmtvbi(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU, ISL_FORMAT_R32G32_UINT),
fmtvb(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_PASSTHRU), fmtvb(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_PASSTHRU),
fmtvb(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU), fmtvb(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU),
fmtvb(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU), fmtvb(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU),
@ -614,6 +627,34 @@ anv_get_image_format_features2(const struct anv_physical_device *physical_device
return flags; return flags;
} }
if (anv_is_storage_format_emulated(vk_format)) {
/* Somehow the block shape is not right */
if (is_sparse)
return 0;
if (isl_mod_info) {
/* The emulation shader code doesn't work with any kind of
* compression or fast clear.
*/
if (isl_mod_info->supports_render_compression ||
isl_mod_info->supports_media_compression ||
isl_mod_info->supports_clear_color)
return 0;
/* If it's not linear or the select tiling format for emulation we
* can't support it.
*/
if (isl_mod_info->tiling != ISL_TILING_LINEAR ||
isl_mod_info->tiling != physical_device->isl_dev.shader_tiling)
return 0;
}
return VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT |
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_ATOMIC_BIT;
}
const VkImageAspectFlags aspects = vk_format_aspects(vk_format); const VkImageAspectFlags aspects = vk_format_aspects(vk_format);
if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
@ -724,6 +765,9 @@ anv_get_image_format_features2(const struct anv_physical_device *physical_device
if (flags & VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT) if (flags & VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT)
flags |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT; flags |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT;
/* We only support single component 32bit formats for image atomics. We
* also emulate for some formats using software detiling & untyped atomics.
*/
if (base_isl_format == ISL_FORMAT_R32_SINT || if (base_isl_format == ISL_FORMAT_R32_SINT ||
base_isl_format == ISL_FORMAT_R32_UINT || base_isl_format == ISL_FORMAT_R32_UINT ||
base_isl_format == ISL_FORMAT_R32_FLOAT) base_isl_format == ISL_FORMAT_R32_FLOAT)
@ -947,6 +991,14 @@ get_buffer_format_features2(const struct intel_device_info *devinfo,
const enum isl_format vbo_format = anv_format->planes[0].vbo_format; const enum isl_format vbo_format = anv_format->planes[0].vbo_format;
if (img_format != ISL_FORMAT_UNSUPPORTED) { if (img_format != ISL_FORMAT_UNSUPPORTED) {
if (anv_is_storage_format_emulated(vk_format)) {
/* Emulation through shader detiling does not allow
* STORAGE_(READ|WRITE)_WITHOUT_FORMAT_BIT
*/
flags = VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT |
VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT |
VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
} else {
if (isl_format_supports_sampling(devinfo, img_format) && if (isl_format_supports_sampling(devinfo, img_format) &&
!isl_format_is_compressed(img_format)) !isl_format_is_compressed(img_format))
flags |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT; flags |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT;
@ -988,6 +1040,7 @@ get_buffer_format_features2(const struct intel_device_info *devinfo,
#endif #endif
} }
} }
}
if (vbo_format != ISL_FORMAT_UNSUPPORTED) { if (vbo_format != ISL_FORMAT_UNSUPPORTED) {
if (isl_format_supports_vertex_fetch(devinfo, vbo_format)) if (isl_format_supports_vertex_fetch(devinfo, vbo_format))
@ -2028,6 +2081,7 @@ void anv_GetPhysicalDeviceSparseImageFormatProperties2(
isl_surf_usage_flags_t isl_usage = isl_surf_usage_flags_t isl_usage =
anv_image_choose_isl_surf_usage(physical_device, anv_image_choose_isl_surf_usage(physical_device,
pFormatInfo->format,
vk_create_flags, pFormatInfo->usage, vk_create_flags, pFormatInfo->usage,
0, aspect, 0, aspect,
VK_IMAGE_COMPRESSION_DEFAULT_EXT); VK_IMAGE_COMPRESSION_DEFAULT_EXT);

View file

@ -205,6 +205,7 @@ memory_range_merge(struct anv_image_memory_range *a,
isl_surf_usage_flags_t isl_surf_usage_flags_t
anv_image_choose_isl_surf_usage(struct anv_physical_device *device, anv_image_choose_isl_surf_usage(struct anv_physical_device *device,
VkFormat vk_format,
VkImageCreateFlags vk_create_flags, VkImageCreateFlags vk_create_flags,
VkImageUsageFlags vk_usage, VkImageUsageFlags vk_usage,
isl_surf_usage_flags_t isl_extra_usage, isl_surf_usage_flags_t isl_extra_usage,
@ -300,6 +301,11 @@ anv_image_choose_isl_surf_usage(struct anv_physical_device *device,
if (comp_flags & VK_IMAGE_COMPRESSION_DISABLED_EXT) if (comp_flags & VK_IMAGE_COMPRESSION_DISABLED_EXT)
isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
if (anv_is_storage_format_emulated(vk_format)) {
isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT |
ISL_SURF_USAGE_SOFTWARE_DETILING;
}
return isl_usage; return isl_usage;
} }
@ -1252,6 +1258,7 @@ add_all_surfaces_implicit_layout(
VkImageUsageFlags vk_usage = vk_image_usage(&image->vk, aspect); VkImageUsageFlags vk_usage = vk_image_usage(&image->vk, aspect);
isl_surf_usage_flags_t isl_usage = isl_surf_usage_flags_t isl_usage =
anv_image_choose_isl_surf_usage(device->physical, anv_image_choose_isl_surf_usage(device->physical,
image->vk.format,
image->vk.create_flags, vk_usage, image->vk.create_flags, vk_usage,
isl_extra_usage_flags, aspect, isl_extra_usage_flags, aspect,
image->vk.compr_flags); image->vk.compr_flags);
@ -1857,8 +1864,8 @@ anv_image_init(struct anv_device *device, struct anv_image *image,
device->physical, image->emu_plane_format, 0, image->vk.tiling); device->physical, image->emu_plane_format, 0, image->vk.tiling);
isl_surf_usage_flags_t isl_usage = anv_image_choose_isl_surf_usage( isl_surf_usage_flags_t isl_usage = anv_image_choose_isl_surf_usage(
device->physical, image->vk.create_flags, image->vk.usage, device->physical, image->vk.format, image->vk.create_flags,
isl_extra_usage_flags, VK_IMAGE_ASPECT_COLOR_BIT, image->vk.usage, isl_extra_usage_flags, VK_IMAGE_ASPECT_COLOR_BIT,
image->vk.compr_flags); image->vk.compr_flags);
r = add_primary_surface(device, image, plane, plane_format, r = add_primary_surface(device, image, plane, plane_format,

View file

@ -334,6 +334,7 @@ get_device_extensions(const struct anv_physical_device *device,
.EXT_shader_atomic_float = true, .EXT_shader_atomic_float = true,
.EXT_shader_atomic_float2 = true, .EXT_shader_atomic_float2 = true,
.EXT_shader_demote_to_helper_invocation = true, .EXT_shader_demote_to_helper_invocation = true,
.EXT_shader_image_atomic_int64 = true,
.EXT_shader_module_identifier = true, .EXT_shader_module_identifier = true,
.EXT_shader_replicated_composites = true, .EXT_shader_replicated_composites = true,
.EXT_shader_stencil_export = true, .EXT_shader_stencil_export = true,
@ -907,6 +908,10 @@ get_features(const struct anv_physical_device *pdevice,
/* VK_EXT_host_image_copy */ /* VK_EXT_host_image_copy */
.hostImageCopy = true, .hostImageCopy = true,
/* VK_EXT_shader_image_atomic_int64 */
.shaderImageInt64Atomics = true,
.sparseImageInt64Atomics = false,
}; };
/* The new DOOM and Wolfenstein games require depthBounds without /* The new DOOM and Wolfenstein games require depthBounds without

View file

@ -910,6 +910,16 @@ print_ubo_load(nir_builder *b,
} }
#endif #endif
static bool
accept_64bit_atomic_cb(const nir_intrinsic_instr *intrin, const void *data)
{
return (intrin->intrinsic == nir_intrinsic_image_atomic ||
intrin->intrinsic == nir_intrinsic_image_atomic_swap ||
intrin->intrinsic == nir_intrinsic_image_deref_atomic ||
intrin->intrinsic == nir_intrinsic_image_deref_atomic_swap) &&
intrin->def.bit_size == 64;
}
static void static void
anv_pipeline_lower_nir(struct anv_pipeline *pipeline, anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
void *mem_ctx, void *mem_ctx,
@ -967,16 +977,31 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
/* Ensure robustness, do this before brw_nir_lower_storage_image so that
* added image size intrinsics for bounds checkings are properly lowered
* for cube images.
*/
NIR_PASS(_, nir, nir_lower_robust_access,
accept_64bit_atomic_cb, NULL);
NIR_PASS(_, nir, brw_nir_lower_storage_image, NIR_PASS(_, nir, brw_nir_lower_storage_image,
&(struct brw_nir_lower_storage_image_opts) { &(struct brw_nir_lower_storage_image_opts) {
/* Anv only supports Gfx9+ which has better defined typed read /* Anv only supports Gfx9+ which has better defined typed read
* behavior. It allows us to only have to care about lowering * behavior.
* loads.
*/ */
.devinfo = compiler->devinfo, .devinfo = compiler->devinfo,
.lower_loads = true, .lower_loads = true,
.lower_stores_64bit = true,
}); });
/* Switch from image to global */
NIR_PASS(_, nir, nir_lower_image_atomics_to_global,
accept_64bit_atomic_cb, NULL);
/* Detile for global */
NIR_PASS(_, nir, brw_nir_lower_texel_address, compiler->devinfo,
pdevice->isl_dev.shader_tiling);
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global, NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
nir_address_format_64bit_global); nir_address_format_64bit_global);
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const, NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,

View file

@ -5374,6 +5374,13 @@ anv_is_compressed_format_emulated(const struct anv_physical_device *pdevice,
format) != VK_FORMAT_UNDEFINED; format) != VK_FORMAT_UNDEFINED;
} }
static inline bool
anv_is_storage_format_emulated(VkFormat format)
{
return format == VK_FORMAT_R64_SINT ||
format == VK_FORMAT_R64_UINT;
}
static inline struct isl_swizzle static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle) anv_swizzle_for_render(struct isl_swizzle swizzle)
{ {
@ -5996,6 +6003,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
isl_surf_usage_flags_t isl_surf_usage_flags_t
anv_image_choose_isl_surf_usage(struct anv_physical_device *device, anv_image_choose_isl_surf_usage(struct anv_physical_device *device,
VkFormat vk_format,
VkImageCreateFlags vk_create_flags, VkImageCreateFlags vk_create_flags,
VkImageUsageFlags vk_usage, VkImageUsageFlags vk_usage,
isl_surf_usage_flags_t isl_extra_usage, isl_surf_usage_flags_t isl_extra_usage,