anv: add an analysis pass to detect compute shaders clearing data

Applications often miss emitting barriers between a shader
initializing data & another shader writing data in the same location
afterward. This is very common for UAVs (see vkd3d-proton).

Vkd3d-proton does a pretty good job as inserting missing barriers
between UAV clears & writes. But some applications also have similar
issues with custom shaders. Here we introduce an analysis pass that
recognize shaders doing clear/initialization. We'll use that
information in the following commit to insert barriers after those
shaders.

Since Gfx12.5 our HW has become a lot more sensitive to those issues
due to the introduction of an L1 untyped data cache that is not
coherent across the shader units. On Gfx20+, typed data is also L1
cacheable exposing even more issues.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
(cherry picked from commit 13bf1a4008)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40752>
This commit is contained in:
Lionel Landwerlin 2026-03-03 10:54:13 +02:00 committed by Eric Engestrom
parent ef136c2687
commit 014f4ce985
7 changed files with 118 additions and 1 deletions

View file

@ -12044,7 +12044,7 @@
"description": "anv: add an analysis pass to detect compute shaders clearing data",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -160,6 +160,8 @@ void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
struct nir_shader *fs_nir,
struct anv_device *device);
enum anv_pipeline_behavior anv_nir_clear_shader_analysis(nir_shader *shader);
#ifdef __cplusplus
}
#endif

View file

@ -0,0 +1,98 @@
/* Copyright © 2026 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "anv_private.h"
#include "anv_nir.h"
#include "nir/nir_builder.h"
/**
* This file implements an analysis pass to detect shaders we assume are
* clearing/initializing some memory.
*
* The criteria for such shader is that all memory store operations are
* writing constant values.
*/
struct clear_state {
uint32_t n_image_store;
uint32_t n_image_store_const;
uint32_t n_ssbo_store;
uint32_t n_ssbo_store_const;
uint32_t n_global_store;
uint32_t n_global_store_const;
};
static bool
intrin_analysis(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
struct clear_state *state = data;
switch (intrin->intrinsic) {
case nir_intrinsic_image_store:
case nir_intrinsic_bindless_image_store:
state->n_image_store++;
if (nir_src_is_const(intrin->src[nir_get_io_data_src_number(intrin)]))
state->n_image_store_const++;
return true;
case nir_intrinsic_image_atomic:
case nir_intrinsic_image_atomic_swap:
case nir_intrinsic_bindless_image_atomic:
case nir_intrinsic_bindless_image_atomic_swap:
state->n_image_store++;
return true;
case nir_intrinsic_store_ssbo:
state->n_ssbo_store++;
if (nir_src_is_const(intrin->src[nir_get_io_data_src_number(intrin)]))
state->n_ssbo_store_const++;
return true;
case nir_intrinsic_ssbo_atomic:
state->n_ssbo_store++;
return true;
case nir_intrinsic_store_global:
state->n_global_store++;
if (nir_src_is_const(intrin->src[nir_get_io_data_src_number(intrin)]))
state->n_global_store_const++;
return true;
case nir_intrinsic_global_atomic:
state->n_global_store++;
return true;
default:
return false;
}
}
enum anv_pipeline_behavior
anv_nir_clear_shader_analysis(nir_shader *shader)
{
struct clear_state state = {};
nir_shader_intrinsics_pass(shader, intrin_analysis, nir_metadata_all, &state);
/* If something doesn't write a constant, assume no behavior. */
if (state.n_image_store != state.n_image_store_const ||
state.n_ssbo_store != state.n_ssbo_store_const ||
state.n_global_store != state.n_global_store_const)
return 0;
enum anv_pipeline_behavior behavior = 0;
if (state.n_image_store > 0 &&
state.n_image_store == state.n_image_store_const)
behavior |= ANV_PIPELINE_BEHAVIOR_CLEAR_TYPED;
if (state.n_ssbo_store > 0 &&
state.n_ssbo_store == state.n_ssbo_store_const)
behavior |= ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED;
if (state.n_global_store > 0 &&
state.n_global_store == state.n_global_store_const)
behavior |= ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED;
return behavior;
}

View file

@ -1169,6 +1169,11 @@ enum anv_pipeline_bind_mask {
ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP = BITFIELD_BIT(8),
};
enum anv_pipeline_behavior {
ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED = BITFIELD_BIT(0),
ANV_PIPELINE_BEHAVIOR_CLEAR_TYPED = BITFIELD_BIT(1),
};
#define ANV_PIPELINE_BIND_MASK_SET(i) (ANV_PIPELINE_BIND_MASK_SET0 << i)
struct anv_pipeline_bind_map {
@ -1194,6 +1199,9 @@ struct anv_pipeline_bind_map {
/* Number of dynamic descriptor in each set */
uint8_t dynamic_descriptors[MAX_SETS];
/* Bitfield of inferred behavior of the shader (enum anv_pipeline_behavior) */
uint8_t inferred_behavior;
};
struct anv_push_descriptor_info {

View file

@ -85,6 +85,7 @@ anv_shader_deserialize(struct vk_device *vk_device,
blob_copy_bytes(blob, data.bind_map.push_ranges, sizeof(data.bind_map.push_ranges));
blob_copy_bytes(blob, data.bind_map.dynamic_descriptors,
sizeof(data.bind_map.dynamic_descriptors));
data.bind_map.inferred_behavior = blob_read_uint8(blob);
if (blob->overrun)
return vk_error(device, VK_ERROR_UNKNOWN);
@ -163,6 +164,7 @@ anv_shader_serialize(struct vk_device *device,
sizeof(shader->bind_map.push_ranges));
blob_write_bytes(blob, shader->bind_map.dynamic_descriptors,
sizeof(shader->bind_map.dynamic_descriptors));
blob_write_uint8(blob, shader->bind_map.inferred_behavior);
return !blob->out_of_memory;
}

View file

@ -1596,6 +1596,12 @@ anv_shader_lower_nir(struct anv_device *device,
shader_data->push_desc_info.fully_promoted_ubo_descriptors =
anv_nir_push_desc_ubo_fully_promoted(
nir, set_layouts, set_layout_count, &shader_data->bind_map);
/* Only detected clearing compute shaders, these are the only problematic
* cases we're aware of.
*/
if (nir->info.stage == MESA_SHADER_COMPUTE)
shader_data->bind_map.inferred_behavior = anv_nir_clear_shader_analysis(nir);
}
static uint32_t

View file

@ -175,6 +175,7 @@ libanv_files = files(
'anv_measure.h',
'anv_nir.h',
'anv_nir_apply_pipeline_layout.c',
'anv_nir_clear_shader_analysis.c',
'anv_nir_compute_push_layout.c',
'anv_nir_lower_driver_values.c',
'anv_nir_lower_multiview.c',