anv: change some image qualifiers as coherent for Last Of Us
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

This fixes graphics artifacts happening with particular shader.

This 'heuristic' hits few very similar shaders but should provide better
performance than current fix to turn off caching from all shaders.

Cc: mesa-stable
Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35929>
This commit is contained in:
Tapani Pälli 2025-08-25 09:33:29 +03:00 committed by Marge Bot
parent 3f317348c2
commit 4035520ca9
5 changed files with 78 additions and 1 deletions

View file

@ -36,6 +36,7 @@ static const driOptionDescription anv_dri_options[] = {
DRI_CONFIG_INTEL_VF_DISTRIBUTION(true)
DRI_CONFIG_INTEL_TE_DISTRIBUTION(true)
DRI_CONFIG_INTEL_STORAGE_CACHE_POLICY_WT(false)
DRI_CONF_ANV_LARGE_WORKGROUP_NON_COHERENT_IMAGE_WORKAROUND(false)
DRI_CONF_ANV_COMPRESSION_CONTROL_ENABLED(false)
DRI_CONF_ANV_FAKE_NONLOCAL_MEMORY(false)
DRI_CONF_OPT_E(intel_stack_id, 512, 256, 2048,
@ -197,6 +198,8 @@ anv_init_dri_options(struct anv_instance *instance)
driQueryOptionb(&instance->dri_options, "intel_vf_distribution");
instance->enable_te_distribution =
driQueryOptionb(&instance->dri_options, "intel_te_distribution");
instance->large_workgroup_non_coherent_image_workaround =
driQueryOptionb(&instance->dri_options, "anv_large_workgroup_non_coherent_image_workaround");
instance->disable_fcv =
driQueryOptionb(&instance->dri_options, "anv_disable_fcv");
instance->enable_buffer_comp =

View file

@ -659,6 +659,10 @@ anv_pipeline_hash_common(struct mesa_sha1 *ctx,
const bool erwf = device->physical->instance->emulate_read_without_format;
_mesa_sha1_update(ctx, &erwf, sizeof(erwf));
const bool large_wg_wa =
device->physical->instance->large_workgroup_non_coherent_image_workaround;
_mesa_sha1_update(ctx, &large_wg_wa, sizeof(large_wg_wa));
}
static void
@ -942,6 +946,52 @@ build_tcs_input_vertices(nir_builder *b, nir_instr *instr, void *data)
return anv_load_driver_uniform(b, 1, gfx.tcs_input_vertices);
}
static void
fixup_large_workgroup_image_coherency(nir_shader *nir)
{
nir_foreach_function_impl(impl, nir) {
nir_foreach_block(block, impl) {
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_image_deref_store ||
nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_3D)
continue;
/* We have found image store access to 3D. */
nir_deref_instr *array_deref = nir_src_as_deref(intr->src[0]);
if (array_deref->deref_type != nir_deref_type_array)
continue;
nir_alu_instr *alu = nir_src_as_alu_instr(intr->src[1]);
if (!alu || !nir_op_is_vec(alu->op))
return;
/* Check if any src is from @load_local_invocation_id. */
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
nir_instr *parent = alu->src[i].src.ssa->parent_instr;
if (parent->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *parent_intr = nir_instr_as_intrinsic(parent);
if (parent_intr->intrinsic !=
nir_intrinsic_load_local_invocation_id)
continue;
/* Found a match, change image access qualifier coherent. */
nir_deref_instr *parent_deref =
nir_src_as_deref(array_deref->parent);
parent_deref->var->data.access = ACCESS_COHERENT;
return;
}
} /* instr */
} /* block */
} /* func */
}
static void
anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
void *mem_ctx,
@ -956,6 +1006,22 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
nir_shader *nir = stage->nir;
unsigned workgroup_size = nir->info.workgroup_size[0] *
nir->info.workgroup_size[1] *
nir->info.workgroup_size[2];
/* We've noticed that a particular shader in "Last Of Us" accesses
* a 3D image using local workgroup index. Corruptions are observed
* unless the image is marked workgroup coherent.
* The shader workgroup size is 16x2x2 (64), which would fit inside
* the subgroup on other vendors (AMD). We think that is why the
* corruption is not observed there.
*/
if (pdevice->instance->large_workgroup_non_coherent_image_workaround &&
stage->stage == MESA_SHADER_COMPUTE &&
workgroup_size == 64)
fixup_large_workgroup_image_coherency(nir);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(_, nir, nir_lower_wpos_center);
NIR_PASS(_, nir, nir_lower_input_attachments,

View file

@ -1390,6 +1390,7 @@ struct anv_instance {
bool anv_upper_bound_descriptor_pool_sampler;
bool custom_border_colors_without_format;
bool vf_component_packing;
bool large_workgroup_non_coherent_image_workaround;
/* HW workarounds */
bool no_16bit;

View file

@ -958,8 +958,11 @@ TODO: document the other workarounds.
<application name="Drive Beyond Horizons" executable="DriveBeyondHorizons.exe">
<option name="force_vk_vendor" value="-1" />
</application>
<application name="The Last Of Us Part I" executable="tlou-i.exe">
<option name="anv_large_workgroup_non_coherent_image_workaround" value="true" />
</application>
<application name="The Last Of Us Part II Remastered" executable="tlou-ii.exe">
<option name="intel_storage_cache_policy_wt" value="true" />
<option name="anv_large_workgroup_non_coherent_image_workaround" value="true" />
</application>
<application name="RESIDENT EVIL 2" executable="re2.exe">
<option name="anv_assume_full_subgroups_with_shared_memory" value="true" />

View file

@ -874,6 +874,10 @@
DRI_CONF_OPT_B(anv_vf_component_packing, def, \
"Vertex fetching component packing")
#define DRI_CONF_ANV_LARGE_WORKGROUP_NON_COHERENT_IMAGE_WORKAROUND(def) \
DRI_CONF_OPT_B(anv_large_workgroup_non_coherent_image_workaround, def, \
"Fixup image coherency qualifier for certain shaders.")
/**
* \brief HASVK specific configuration options
*/