mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
anv: add an analysis pass to detect compute shaders clearing data
Applications often miss emitting barriers between a shader
initializing data & another shader writing data in the same location
afterward. This is very common for UAVs (see vkd3d-proton).
Vkd3d-proton does a pretty good job as inserting missing barriers
between UAV clears & writes. But some applications also have similar
issues with custom shaders. Here we introduce an analysis pass that
recognize shaders doing clear/initialization. We'll use that
information in the following commit to insert barriers after those
shaders.
Since Gfx12.5 our HW has become a lot more sensitive to those issues
due to the introduction of an L1 untyped data cache that is not
coherent across the shader units. On Gfx20+, typed data is also L1
cacheable exposing even more issues.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
(cherry picked from commit 13bf1a4008)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40752>
This commit is contained in:
parent
ef136c2687
commit
014f4ce985
7 changed files with 118 additions and 1 deletions
|
|
@ -12044,7 +12044,7 @@
|
|||
"description": "anv: add an analysis pass to detect compute shaders clearing data",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -160,6 +160,8 @@ void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
|
|||
struct nir_shader *fs_nir,
|
||||
struct anv_device *device);
|
||||
|
||||
enum anv_pipeline_behavior anv_nir_clear_shader_analysis(nir_shader *shader);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
98
src/intel/vulkan/anv_nir_clear_shader_analysis.c
Normal file
98
src/intel/vulkan/anv_nir_clear_shader_analysis.c
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
/* Copyright © 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "anv_nir.h"
|
||||
|
||||
#include "nir/nir_builder.h"
|
||||
|
||||
/**
|
||||
* This file implements an analysis pass to detect shaders we assume are
|
||||
* clearing/initializing some memory.
|
||||
*
|
||||
* The criteria for such shader is that all memory store operations are
|
||||
* writing constant values.
|
||||
*/
|
||||
|
||||
struct clear_state {
|
||||
uint32_t n_image_store;
|
||||
uint32_t n_image_store_const;
|
||||
|
||||
uint32_t n_ssbo_store;
|
||||
uint32_t n_ssbo_store_const;
|
||||
|
||||
uint32_t n_global_store;
|
||||
uint32_t n_global_store_const;
|
||||
};
|
||||
|
||||
static bool
|
||||
intrin_analysis(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||
{
|
||||
struct clear_state *state = data;
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_image_store:
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
state->n_image_store++;
|
||||
if (nir_src_is_const(intrin->src[nir_get_io_data_src_number(intrin)]))
|
||||
state->n_image_store_const++;
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_image_atomic:
|
||||
case nir_intrinsic_image_atomic_swap:
|
||||
case nir_intrinsic_bindless_image_atomic:
|
||||
case nir_intrinsic_bindless_image_atomic_swap:
|
||||
state->n_image_store++;
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_store_ssbo:
|
||||
state->n_ssbo_store++;
|
||||
if (nir_src_is_const(intrin->src[nir_get_io_data_src_number(intrin)]))
|
||||
state->n_ssbo_store_const++;
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
state->n_ssbo_store++;
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_store_global:
|
||||
state->n_global_store++;
|
||||
if (nir_src_is_const(intrin->src[nir_get_io_data_src_number(intrin)]))
|
||||
state->n_global_store_const++;
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_global_atomic:
|
||||
state->n_global_store++;
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
enum anv_pipeline_behavior
|
||||
anv_nir_clear_shader_analysis(nir_shader *shader)
|
||||
{
|
||||
struct clear_state state = {};
|
||||
|
||||
nir_shader_intrinsics_pass(shader, intrin_analysis, nir_metadata_all, &state);
|
||||
|
||||
/* If something doesn't write a constant, assume no behavior. */
|
||||
if (state.n_image_store != state.n_image_store_const ||
|
||||
state.n_ssbo_store != state.n_ssbo_store_const ||
|
||||
state.n_global_store != state.n_global_store_const)
|
||||
return 0;
|
||||
|
||||
enum anv_pipeline_behavior behavior = 0;
|
||||
if (state.n_image_store > 0 &&
|
||||
state.n_image_store == state.n_image_store_const)
|
||||
behavior |= ANV_PIPELINE_BEHAVIOR_CLEAR_TYPED;
|
||||
if (state.n_ssbo_store > 0 &&
|
||||
state.n_ssbo_store == state.n_ssbo_store_const)
|
||||
behavior |= ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED;
|
||||
if (state.n_global_store > 0 &&
|
||||
state.n_global_store == state.n_global_store_const)
|
||||
behavior |= ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED;
|
||||
|
||||
return behavior;
|
||||
}
|
||||
|
|
@ -1169,6 +1169,11 @@ enum anv_pipeline_bind_mask {
|
|||
ANV_PIPELINE_BIND_MASK_USES_NUM_WORKGROUP = BITFIELD_BIT(8),
|
||||
};
|
||||
|
||||
enum anv_pipeline_behavior {
|
||||
ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED = BITFIELD_BIT(0),
|
||||
ANV_PIPELINE_BEHAVIOR_CLEAR_TYPED = BITFIELD_BIT(1),
|
||||
};
|
||||
|
||||
#define ANV_PIPELINE_BIND_MASK_SET(i) (ANV_PIPELINE_BIND_MASK_SET0 << i)
|
||||
|
||||
struct anv_pipeline_bind_map {
|
||||
|
|
@ -1194,6 +1199,9 @@ struct anv_pipeline_bind_map {
|
|||
|
||||
/* Number of dynamic descriptor in each set */
|
||||
uint8_t dynamic_descriptors[MAX_SETS];
|
||||
|
||||
/* Bitfield of inferred behavior of the shader (enum anv_pipeline_behavior) */
|
||||
uint8_t inferred_behavior;
|
||||
};
|
||||
|
||||
struct anv_push_descriptor_info {
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ anv_shader_deserialize(struct vk_device *vk_device,
|
|||
blob_copy_bytes(blob, data.bind_map.push_ranges, sizeof(data.bind_map.push_ranges));
|
||||
blob_copy_bytes(blob, data.bind_map.dynamic_descriptors,
|
||||
sizeof(data.bind_map.dynamic_descriptors));
|
||||
data.bind_map.inferred_behavior = blob_read_uint8(blob);
|
||||
|
||||
if (blob->overrun)
|
||||
return vk_error(device, VK_ERROR_UNKNOWN);
|
||||
|
|
@ -163,6 +164,7 @@ anv_shader_serialize(struct vk_device *device,
|
|||
sizeof(shader->bind_map.push_ranges));
|
||||
blob_write_bytes(blob, shader->bind_map.dynamic_descriptors,
|
||||
sizeof(shader->bind_map.dynamic_descriptors));
|
||||
blob_write_uint8(blob, shader->bind_map.inferred_behavior);
|
||||
|
||||
return !blob->out_of_memory;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1596,6 +1596,12 @@ anv_shader_lower_nir(struct anv_device *device,
|
|||
shader_data->push_desc_info.fully_promoted_ubo_descriptors =
|
||||
anv_nir_push_desc_ubo_fully_promoted(
|
||||
nir, set_layouts, set_layout_count, &shader_data->bind_map);
|
||||
|
||||
/* Only detected clearing compute shaders, these are the only problematic
|
||||
* cases we're aware of.
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE)
|
||||
shader_data->bind_map.inferred_behavior = anv_nir_clear_shader_analysis(nir);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
|
|||
|
|
@ -175,6 +175,7 @@ libanv_files = files(
|
|||
'anv_measure.h',
|
||||
'anv_nir.h',
|
||||
'anv_nir_apply_pipeline_layout.c',
|
||||
'anv_nir_clear_shader_analysis.c',
|
||||
'anv_nir_compute_push_layout.c',
|
||||
'anv_nir_lower_driver_values.c',
|
||||
'anv_nir_lower_multiview.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue