From ccef88173b0e6fe200149b828d165d8811fe7808 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 12 May 2026 12:21:42 +0300 Subject: [PATCH] anv: add SIMD32 requirement heuristic for Dragon Dogma 2 A few compute shaders are doing BC3 image generation on the device and then generate incorrect data if running at SIMD16. That data is then sampled in a vertex shader that generates incorrect geometry. See https://github.com/ValveSoftware/Proton/issues/7595#issuecomment-4343662131 Signed-off-by: Lionel Landwerlin Cc: mesa-stable Part-of: --- src/intel/vulkan/anv_shader_compile.c | 100 +++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 8 deletions(-) diff --git a/src/intel/vulkan/anv_shader_compile.c b/src/intel/vulkan/anv_shader_compile.c index 0e85b2129c1..c2422110562 100644 --- a/src/intel/vulkan/anv_shader_compile.c +++ b/src/intel/vulkan/anv_shader_compile.c @@ -66,14 +66,8 @@ static bool is_alu1_iand_0x1f(nir_alu_instr *alu) return false; } -static bool -detect_simd32_shuffle(nir_builder *b, - nir_intrinsic_instr *intrin, - void *data) +static bool is_simd32_shuffle(nir_intrinsic_instr *intrin) { - if (intrin->intrinsic != nir_intrinsic_shuffle) - return false; - nir_alu_instr *alu1 = nir_src_as_alu(intrin->src[1]); if (alu1 == NULL) return false; @@ -89,6 +83,96 @@ detect_simd32_shuffle(nir_builder *b, return false; } +/* Try to detect shaders testing with a sequence like this : + * + * 32x3 %49 = @load_local_invocation_id + * 32 %1673 = load_const (0xffffffe0 = -32 = 4294967264) + * 32 %1674 = iand %49.x, %1673 (0xffffffe0) + * 32 %1675 = @load_subgroup_size + * 32 %1676 = umod %1674, %1675 + * + * This sequence appears to be targetted at subgroup sizes larger than 32. The + * problem in this sequence is that subgroup size is expected to be >= 32 to + * match the masking of local_invocation_id above. If inferior, the umod + * operation returns the same value as if the subgroup was 32. + */ +static bool is_alu_used_for_umod_subgroup_size(nir_alu_instr *in_alu) +{ + nir_foreach_use(src, &in_alu->def) { + nir_instr *instr = nir_src_use_instr(src); + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (alu->op != nir_op_umod && + alu->op != nir_op_imod) + continue; + + for (uint32_t i = 0; i < 2; i++) { + if (&alu->src[i].src == src) + continue; + + if (!nir_src_is_intrinsic(alu->src[i].src) || + nir_src_as_intrinsic(alu->src[i].src)->intrinsic != nir_intrinsic_load_subgroup_size) + continue; + + return true; + } + } + + return false; +} + +static bool +is_local_invoc_id_used_with_simd32_assumption(nir_intrinsic_instr *subgroup_inv) +{ + nir_foreach_use(src, &subgroup_inv->def) { + nir_instr *instr = nir_src_use_instr(src); + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (alu->op != nir_op_iand) + continue; + + /* nir_print_instr(&alu->instr, stderr); */ + /* fprintf(stderr, "\n"); */ + + for (uint32_t i = 0; i < 2; i++) { + if (&alu->src[i].src == src) + continue; + + if (!nir_src_is_const(alu->src[i].src)) + continue; + + if (nir_src_as_uint(alu->src[i].src) != 0xffffffe0) + continue; + + if (is_alu_used_for_umod_subgroup_size(alu)) + return true; + } + } + + return false; +} + +static bool +detect_simd32_requirement(nir_builder *b, + nir_intrinsic_instr *intrin, + void *data) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_shuffle: + return is_simd32_shuffle(intrin); + + case nir_intrinsic_load_local_invocation_id: + return is_local_invoc_id_used_with_simd32_assumption(intrin); + + default: + return false; + } +} + /* List of game-specific workarounds identified by BLAKE3 hash of the shader. * Add new workarounds here as needed. */ @@ -828,7 +912,7 @@ anv_fixup_subgroup_size(struct anv_device *device, nir_shader *shader) info->min_subgroup_size != info->max_subgroup_size && info->uses_wide_subgroup_intrinsics && nir_shader_intrinsics_pass(shader, - detect_simd32_shuffle, + detect_simd32_requirement, nir_metadata_all, NULL)) { info->max_subgroup_size = BRW_SUBGROUP_SIZE;