anv: Add compute only divergent atomics fusion optimization for Blender
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Blender uses atomic operations as part of its virtual shadow mapping
implementation. Virtual shadow mapping page tagging in compute shaders
benefits from divergent atomics fusion, while fragment shaders doing the
atomic raster step in general have worse performance with this
optimization turned on.
Thus, an option is added to only apply divergent atomics fusion to compute
shaders in ANV, and this option is enabled for Blender.

Initial support for divergent atomics fusion optimization in ANV was added
in https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40631.

Signed-off-by: Christoph Neuhauser <christoph.neuhauser@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41706>
This commit is contained in:
Christoph Neuhauser 2026-05-18 19:42:26 +02:00 committed by Marge Bot
parent 28f6a442c6
commit 7eba054c5b
5 changed files with 22 additions and 0 deletions

View file

@ -43,6 +43,7 @@ static const driOptionDescription anv_dri_options[] = {
DRI_CONF_ANV_FORCE_INDIRECT_DESCRIPTORS(false)
DRI_CONF_ANV_DISABLE_LINK_TIME_OPTIMIZATION(false)
DRI_CONF_ANV_ENABLE_OPT_DIVERGENT_ATOMICS(0)
DRI_CONF_ANV_ENABLE_OPT_DIVERGENT_ATOMICS_COMPUTE_ONLY(0)
DRI_CONF_ANV_BRW_DISABLE_SUBGROUP_SIZE_CONTROL(false)
DRI_CONF_SHADER_SPILLING_RATE(11)
DRI_CONFIG_INTEL_FORCE_COMPUTE_SURFACE_PREFETCH(true)
@ -318,6 +319,8 @@ anv_init_dri_options(struct anv_instance *instance)
driQueryOptionb(&instance->dri_options, "anv_disable_link_time_optimization");
instance->enable_opt_divergent_atomics =
driQueryOptioni(&instance->dri_options, "anv_enable_opt_divergent_atomics");
instance->enable_opt_divergent_atomics_compute_only =
driQueryOptioni(&instance->dri_options, "anv_enable_opt_divergent_atomics_compute_only");
instance->stack_ids = driQueryOptioni(&instance->dri_options, "intel_stack_id");
switch (instance->stack_ids) {

View file

@ -1813,6 +1813,7 @@ struct anv_instance {
bool disable_lto;
bool disable_push_constant_alloc;
enum brw_divergent_atomics_flags enable_opt_divergent_atomics;
enum brw_divergent_atomics_flags enable_opt_divergent_atomics_compute_only;
bool force_sampler_prefetch;
bool force_compute_surface_prefetch;
unsigned generated_indirect_threshold;

View file

@ -665,7 +665,14 @@ populate_cs_prog_key(struct brw_cs_prog_key *key,
const struct vk_physical_device *device,
const struct vk_pipeline_robustness_state *rs)
{
const struct anv_physical_device *pdevice =
container_of(device, const struct anv_physical_device, vk);
populate_base_prog_key(&key->base, device, rs);
key->base.divergent_atomics_flags |=
pdevice->instance->enable_opt_divergent_atomics_compute_only;
}
static void

View file

@ -1157,6 +1157,13 @@ TODO: document the other workarounds.
-->
<option name="anv_barrier_post_untyped_clear_shader" value="true" />
</application>
<application name="Blender" executable="blender">
<!-- Virtual shadow mapping page tagging in compute shaders
benefits from divergent atomics fusion, while fragment
shaders doing the atomic raster step in general have worse
performance with this optimization turned on. -->
<option name="anv_enable_opt_divergent_atomics_compute_only" value="1" />
</application>
<!-- Source2 games seem confused by the CCS-only memory type being
restricted to images, so allow it for buffers. More details on
the issue:

View file

@ -980,6 +980,10 @@
DRI_CONF_OPT_I(anv_enable_opt_divergent_atomics, def, 0, 3,\
"Enable fusion of divergent atomics (see brw_divergent_atomics_flags)")
#define DRI_CONF_ANV_ENABLE_OPT_DIVERGENT_ATOMICS_COMPUTE_ONLY(def) \
DRI_CONF_OPT_I(anv_enable_opt_divergent_atomics_compute_only, def, 0, 3,\
"Enable fusion of divergent atomics for compute shaders only (see brw_divergent_atomics_flags)")
#define DRI_CONF_ANV_BRW_DISABLE_SUBGROUP_SIZE_CONTROL(def) \
DRI_CONF_OPT_B(anv_brw_disable_subgroup_size_control, def, \
"Disable EXT_subgroup_size_control support when using brw compiler.")