From 014f4ce9855f0d893bac9d6153da0e316a61077d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 3 Mar 2026 10:54:13 +0200 Subject: [PATCH] anv: add an analysis pass to detect compute shaders clearing data Applications often miss emitting barriers between a shader initializing data & another shader writing data in the same location afterward. This is very common for UAVs (see vkd3d-proton). Vkd3d-proton does a pretty good job as inserting missing barriers between UAV clears & writes. But some applications also have similar issues with custom shaders. Here we introduce an analysis pass that recognize shaders doing clear/initialization. We'll use that information in the following commit to insert barriers after those shaders. Since Gfx12.5 our HW has become a lot more sensitive to those issues due to the introduction of an L1 untyped data cache that is not coherent across the shader units. On Gfx20+, typed data is also L1 cacheable exposing even more issues. Signed-off-by: Lionel Landwerlin Cc: mesa-stable Reviewed-by: Ivan Briano (cherry picked from commit 13bf1a4008b90030e0973eb1edd6181f79b64074) Part-of: --- .pick_status.json | 2 +- src/intel/vulkan/anv_nir.h | 2 + .../vulkan/anv_nir_clear_shader_analysis.c | 98 +++++++++++++++++++ src/intel/vulkan/anv_private.h | 8 ++ src/intel/vulkan/anv_shader.c | 2 + src/intel/vulkan/anv_shader_compile.c | 6 ++ src/intel/vulkan/meson.build | 1 + 7 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 src/intel/vulkan/anv_nir_clear_shader_analysis.c diff --git a/.pick_status.json b/.pick_status.json index 9231c145688..60d7a855262 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -12044,7 +12044,7 @@ "description": "anv: add an analysis pass to detect compute shaders clearing data", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index 32101b2cd2c..7b5fc04be3b 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -160,6 +160,8 @@ void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir, struct nir_shader *fs_nir, struct anv_device *device); +enum anv_pipeline_behavior anv_nir_clear_shader_analysis(nir_shader *shader); + #ifdef __cplusplus } #endif diff --git a/src/intel/vulkan/anv_nir_clear_shader_analysis.c b/src/intel/vulkan/anv_nir_clear_shader_analysis.c new file mode 100644 index 00000000000..c4b3d70acc7 --- /dev/null +++ b/src/intel/vulkan/anv_nir_clear_shader_analysis.c @@ -0,0 +1,98 @@ +/* Copyright © 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "anv_private.h" +#include "anv_nir.h" + +#include "nir/nir_builder.h" + +/** + * This file implements an analysis pass to detect shaders we assume are + * clearing/initializing some memory. + * + * The criteria for such shader is that all memory store operations are + * writing constant values. + */ + +struct clear_state { + uint32_t n_image_store; + uint32_t n_image_store_const; + + uint32_t n_ssbo_store; + uint32_t n_ssbo_store_const; + + uint32_t n_global_store; + uint32_t n_global_store_const; +}; + +static bool +intrin_analysis(nir_builder *b, nir_intrinsic_instr *intrin, void *data) +{ + struct clear_state *state = data; + switch (intrin->intrinsic) { + case nir_intrinsic_image_store: + case nir_intrinsic_bindless_image_store: + state->n_image_store++; + if (nir_src_is_const(intrin->src[nir_get_io_data_src_number(intrin)])) + state->n_image_store_const++; + return true; + + case nir_intrinsic_image_atomic: + case nir_intrinsic_image_atomic_swap: + case nir_intrinsic_bindless_image_atomic: + case nir_intrinsic_bindless_image_atomic_swap: + state->n_image_store++; + return true; + + case nir_intrinsic_store_ssbo: + state->n_ssbo_store++; + if (nir_src_is_const(intrin->src[nir_get_io_data_src_number(intrin)])) + state->n_ssbo_store_const++; + return true; + + case nir_intrinsic_ssbo_atomic: + state->n_ssbo_store++; + return true; + + case nir_intrinsic_store_global: + state->n_global_store++; + if (nir_src_is_const(intrin->src[nir_get_io_data_src_number(intrin)])) + state->n_global_store_const++; + return true; + + case nir_intrinsic_global_atomic: + state->n_global_store++; + return true; + + default: + return false; + } +} + +enum anv_pipeline_behavior +anv_nir_clear_shader_analysis(nir_shader *shader) +{ + struct clear_state state = {}; + + nir_shader_intrinsics_pass(shader, intrin_analysis, nir_metadata_all, &state); + + /* If something doesn't write a constant, assume no behavior. */ + if (state.n_image_store != state.n_image_store_const || + state.n_ssbo_store != state.n_ssbo_store_const || + state.n_global_store != state.n_global_store_const) + return 0; + + enum anv_pipeline_behavior behavior = 0; + if (state.n_image_store > 0 && + state.n_image_store == state.n_image_store_const) + behavior |= ANV_PIPELINE_BEHAVIOR_CLEAR_TYPED; + if (state.n_ssbo_store > 0 && + state.n_ssbo_store == state.n_ssbo_store_const) + behavior |= ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED; + if (state.n_global_store > 0 && + state.n_global_store == state.n_global_store_const) + behavior |= ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED; + + return behavior; +} diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index e9bb250030d..0b702c19bc1 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1169,6 +1169,11 @@ enum anv_pipeline_bind_mask { ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP = BITFIELD_BIT(8), }; +enum anv_pipeline_behavior { + ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED = BITFIELD_BIT(0), + ANV_PIPELINE_BEHAVIOR_CLEAR_TYPED = BITFIELD_BIT(1), +}; + #define ANV_PIPELINE_BIND_MASK_SET(i) (ANV_PIPELINE_BIND_MASK_SET0 << i) struct anv_pipeline_bind_map { @@ -1194,6 +1199,9 @@ struct anv_pipeline_bind_map { /* Number of dynamic descriptor in each set */ uint8_t dynamic_descriptors[MAX_SETS]; + + /* Bitfield of inferred behavior of the shader (enum anv_pipeline_behavior) */ + uint8_t inferred_behavior; }; struct anv_push_descriptor_info { diff --git a/src/intel/vulkan/anv_shader.c b/src/intel/vulkan/anv_shader.c index 36d18a03456..8e24c1378c0 100644 --- a/src/intel/vulkan/anv_shader.c +++ b/src/intel/vulkan/anv_shader.c @@ -85,6 +85,7 @@ anv_shader_deserialize(struct vk_device *vk_device, blob_copy_bytes(blob, data.bind_map.push_ranges, sizeof(data.bind_map.push_ranges)); blob_copy_bytes(blob, data.bind_map.dynamic_descriptors, sizeof(data.bind_map.dynamic_descriptors)); + data.bind_map.inferred_behavior = blob_read_uint8(blob); if (blob->overrun) return vk_error(device, VK_ERROR_UNKNOWN); @@ -163,6 +164,7 @@ anv_shader_serialize(struct vk_device *device, sizeof(shader->bind_map.push_ranges)); blob_write_bytes(blob, shader->bind_map.dynamic_descriptors, sizeof(shader->bind_map.dynamic_descriptors)); + blob_write_uint8(blob, shader->bind_map.inferred_behavior); return !blob->out_of_memory; } diff --git a/src/intel/vulkan/anv_shader_compile.c b/src/intel/vulkan/anv_shader_compile.c index 3eac8e49863..a259bbbbe5d 100644 --- a/src/intel/vulkan/anv_shader_compile.c +++ b/src/intel/vulkan/anv_shader_compile.c @@ -1596,6 +1596,12 @@ anv_shader_lower_nir(struct anv_device *device, shader_data->push_desc_info.fully_promoted_ubo_descriptors = anv_nir_push_desc_ubo_fully_promoted( nir, set_layouts, set_layout_count, &shader_data->bind_map); + + /* Only detected clearing compute shaders, these are the only problematic + * cases we're aware of. + */ + if (nir->info.stage == MESA_SHADER_COMPUTE) + shader_data->bind_map.inferred_behavior = anv_nir_clear_shader_analysis(nir); } static uint32_t diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index 7e5e7714494..101a36c7d32 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -175,6 +175,7 @@ libanv_files = files( 'anv_measure.h', 'anv_nir.h', 'anv_nir_apply_pipeline_layout.c', + 'anv_nir_clear_shader_analysis.c', 'anv_nir_compute_push_layout.c', 'anv_nir_lower_driver_values.c', 'anv_nir_lower_multiview.c',