From 1f5a9a5a7390f4d52ef9aa0aac41f0431068184d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 26 May 2025 09:28:04 +0300 Subject: [PATCH] anv: add support for shaderStorageImageReadWithoutFormat through emulation Using in shader lowering, only relevant for Gfx9,11,12.0 platforms. Signed-off-by: Lionel Landwerlin Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5117 Reviewed-by: Caio Oliveira Part-of: --- src/intel/vulkan/anv_buffer_view.c | 2 ++ src/intel/vulkan/anv_descriptor_set.c | 8 +++++--- src/intel/vulkan/anv_formats.c | 8 +++++++- src/intel/vulkan/anv_instance.c | 3 +++ .../vulkan/anv_nir_apply_pipeline_layout.c | 17 +++++++++++++++++ src/intel/vulkan/anv_physical_device.c | 5 +++-- src/intel/vulkan/anv_pipeline.c | 5 +++++ src/intel/vulkan/anv_private.h | 6 ++++++ src/util/driconf.h | 4 ++++ 9 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/intel/vulkan/anv_buffer_view.c b/src/intel/vulkan/anv_buffer_view.c index 8b96383f829..833873d27e2 100644 --- a/src/intel/vulkan/anv_buffer_view.c +++ b/src/intel/vulkan/anv_buffer_view.c @@ -46,6 +46,8 @@ anv_CreateBufferView(VkDevice _device, format = anv_get_format_plane(device->physical, pCreateInfo->format, 0, VK_IMAGE_TILING_LINEAR); + view->format = format.isl_format; + const uint32_t format_bs = isl_format_get_layout(format.isl_format)->bpb / 8; const uint32_t align_range = align_down_npot_u32(view->vk.range, format_bs); diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index b037020ff0a..358cfdb666e 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -274,13 +274,13 @@ anv_descriptor_data_size(enum anv_descriptor_data data, unsigned sampler_size = 0; if (data & ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE) - surface_size += sizeof(struct anv_sampled_image_descriptor); + surface_size += align(sizeof(struct anv_sampled_image_descriptor), 8); if (data & ANV_DESCRIPTOR_INDIRECT_STORAGE_IMAGE) - surface_size += sizeof(struct anv_storage_image_descriptor); + surface_size += align(sizeof(struct anv_storage_image_descriptor), 8); if (data & ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE) - surface_size += sizeof(struct anv_address_range_descriptor); + surface_size += align(sizeof(struct anv_address_range_descriptor), 8); if (data & ANV_DESCRIPTOR_SURFACE) surface_size += ANV_SURFACE_STATE_SIZE; @@ -2272,6 +2272,7 @@ anv_descriptor_set_write_image_view(struct anv_device *device, .tile_mode = image_view->image->planes[0].primary_surface.isl.tiling == ISL_TILING_LINEAR ? 0 : 0xffffffff, .row_pitch_B = image_view->image->planes[0].primary_surface.isl.row_pitch_B, .qpitch = image_view->image->planes[0].primary_surface.isl.array_pitch_el_rows, + .format = image_view->planes[0].isl.format, }; memcpy(desc_surface_map, &desc_data, sizeof(desc_data)); } else { @@ -2408,6 +2409,7 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device, device->physical, buffer_view->storage.state), .image_address = anv_address_physical(buffer_view->address), /* tile_mode, row_pitch_B, qpitch = 0 */ + .format = buffer_view->format, }; memcpy(desc_map, &desc_data, sizeof(desc_data)); } diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index f651073a29b..c9ce85f7504 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -749,8 +749,14 @@ anv_get_image_format_features2(const struct anv_physical_device *physical_device /* Load/store is determined based on base format. This prevents RGB * formats from showing up as load/store capable. + * + * Typed writes match with storage write without format. For storage read + * without format, either HW can do it (isl_format_supports_typed_reads) or + * do in-shader conversion for isl_is_storage_image_format format. */ - if (isl_format_supports_typed_reads(devinfo, base_isl_format)) + if (isl_format_supports_typed_reads(devinfo, base_isl_format) || + (physical_device->instance->emulate_read_without_format && + isl_is_storage_image_format(devinfo, plane_format.isl_format))) flags |= VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT; if (isl_format_supports_typed_writes(devinfo, base_isl_format)) flags |= VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT; diff --git a/src/intel/vulkan/anv_instance.c b/src/intel/vulkan/anv_instance.c index 303e28c6efb..683cc205845 100644 --- a/src/intel/vulkan/anv_instance.c +++ b/src/intel/vulkan/anv_instance.c @@ -56,6 +56,7 @@ static const driOptionDescription anv_dri_options[] = { DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false) DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false) DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(-2) + DRI_CONF_ANV_EMULATE_READ_WITHOUT_FORMAT(false) DRI_CONF_FORCE_VK_VENDOR() DRI_CONF_FAKE_SPARSE(false) DRI_CONF_CUSTOM_BORDER_COLORS_WITHOUT_FORMAT(!DETECT_OS_ANDROID) @@ -175,6 +176,8 @@ anv_init_dri_options(struct anv_instance *instance) driQueryOptionb(&instance->dri_options, "intel_enable_wa_14018912822"); instance->mesh_conv_prim_attrs_to_vert_attrs = driQueryOptioni(&instance->dri_options, "anv_mesh_conv_prim_attrs_to_vert_attrs"); + instance->emulate_read_without_format = + driQueryOptionb(&instance->dri_options, "anv_emulate_read_without_format"); instance->fp64_workaround_enabled = driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled"); instance->generated_indirect_threshold = diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 9825a94410d..d4c0ad5336f 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -1729,6 +1729,12 @@ lower_image_load_intel_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, offsetof(struct anv_storage_image_descriptor, qpitch), 1, 32, state); break; + case ISL_SURF_PARAM_FORMAT: + desc = build_load_descriptor_mem( + b, desc_addr, + offsetof(struct anv_storage_image_descriptor, format), + 1, 32, state); + break; default: unreachable("Invalid surface parameter"); } @@ -1783,6 +1789,17 @@ lower_image_load_intel_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, desc = nir_ishl_imm(b, desc, 2); break; } + case ISL_SURF_PARAM_FORMAT: { + nir_def *format_dword = build_load_descriptor_mem( + b, desc_addr, + RENDER_SURFACE_STATE_SurfaceFormat_start(devinfo) / 8, + 1, 32, state); + desc = nir_ubitfield_extract_imm( + b, format_dword, + RENDER_SURFACE_STATE_SurfaceFormat_start(devinfo) % 32, + RENDER_SURFACE_STATE_SurfaceFormat_bits(devinfo)); + break; + } default: unreachable("Invalid surface parameter"); } diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c index 54221684f9b..1b1bdbe3feb 100644 --- a/src/intel/vulkan/anv_physical_device.c +++ b/src/intel/vulkan/anv_physical_device.c @@ -447,9 +447,10 @@ get_features(const struct anv_physical_device *pdevice, .shaderStorageImageExtendedFormats = true, .shaderStorageImageMultisample = false, /* Gfx12.5 has all the required format supported in HW for typed - * read/writes + * read/writes, on Gfx11 & Gfx12.0 we emulate for 3 formats. */ - .shaderStorageImageReadWithoutFormat = pdevice->info.verx10 >= 125, + .shaderStorageImageReadWithoutFormat = pdevice->info.verx10 >= 125 || + pdevice->instance->emulate_read_without_format, .shaderStorageImageWriteWithoutFormat = true, .shaderUniformBufferArrayDynamicIndexing = true, .shaderSampledImageArrayDynamicIndexing = true, diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 721d96043fd..eeb1a9d34e8 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -656,6 +656,9 @@ anv_pipeline_hash_common(struct mesa_sha1 *ctx, const int spilling_rate = device->physical->compiler->spilling_rate; _mesa_sha1_update(ctx, &spilling_rate, sizeof(spilling_rate)); + + const bool erwf = device->physical->instance->emulate_read_without_format; + _mesa_sha1_update(ctx, &erwf, sizeof(erwf)); } static void @@ -1034,6 +1037,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, */ .lower_loads = true, .lower_stores_64bit = true, + .lower_loads_without_formats = + pdevice->instance->emulate_read_without_format, }); if (lower_64bit_atomics) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 9bab297b5f2..8c98e33fde6 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1357,6 +1357,7 @@ struct anv_instance { bool enable_te_distribution; bool external_memory_implicit_sync; bool force_guc_low_latency; + bool emulate_read_without_format; /** * Workarounds for game bugs. @@ -2872,6 +2873,9 @@ struct anv_storage_image_descriptor { /** Image Q pitch (rows between array slices) */ uint32_t qpitch; + + /** Image Format (enum isl_format) */ + uint32_t format; }; /** Struct representing a address/range descriptor @@ -3154,6 +3158,8 @@ struct anv_buffer_state { struct anv_buffer_view { struct vk_buffer_view vk; + enum isl_format format; + struct anv_address address; struct anv_buffer_state general; diff --git a/src/util/driconf.h b/src/util/driconf.h index 3815ea3b991..bcad7608c02 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -812,6 +812,10 @@ DRI_CONF_OPT_B(anv_assume_full_subgroups_with_shared_memory, def, \ "Allow assuming full subgroups requirement for shaders using shared memory even when it's not specified explicitly") +#define DRI_CONF_ANV_EMULATE_READ_WITHOUT_FORMAT(def) \ + DRI_CONF_OPT_B(anv_emulate_read_without_format, def, \ + "Emulate shaderStorageImageReadWithoutFormat with shader conversions") + #define DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(def) \ DRI_CONF_OPT_B(anv_sample_mask_out_opengl_behaviour, def, \ "Ignore sample mask out when having single sampled target")