anv: add SIMD32 requirement heuristic for Dragon Dogma 2

A few compute shaders are doing BC3 image generation on the device and then generate incorrect data if running at SIMD16. That data is then sampled in a vertex shader that generates incorrect geometry. See https://github.com/ValveSoftware/Proton/issues/7595#issuecomment-4343662131 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41501>
2026-05-18 05:08:06 +02:00 · 2026-05-12 12:21:42 +03:00 · 2026-05-12 12:21:42 +03:00 · ccef88173b
commit ccef88173b
parent dfa7e15f7c
1 changed files with 92 additions and 8 deletions
--- a/src/intel/vulkan/anv_shader_compile.c
+++ b/src/intel/vulkan/anv_shader_compile.c
@ -66,14 +66,8 @@ static bool is_alu1_iand_0x1f(nir_alu_instr *alu)
   return false;
 }

-static bool
-detect_simd32_shuffle(nir_builder *b,
-                      nir_intrinsic_instr *intrin,
-                      void *data)
+static bool is_simd32_shuffle(nir_intrinsic_instr *intrin)
 {
-   if (intrin->intrinsic != nir_intrinsic_shuffle)
-      return false;
-
   nir_alu_instr *alu1 = nir_src_as_alu(intrin->src[1]);
   if (alu1 == NULL)
      return false;
@ -89,6 +83,96 @@ detect_simd32_shuffle(nir_builder *b,
   return false;
 }

+/* Try to detect shaders testing with a sequence like this :
+ *
+ * 32x3    %49 = @load_local_invocation_id
+ * 32    %1673 = load_const (0xffffffe0 = -32 = 4294967264)
+ * 32    %1674 = iand %49.x, %1673 (0xffffffe0)
+ * 32    %1675 = @load_subgroup_size
+ * 32    %1676 = umod %1674, %1675
+ *
+ * This sequence appears to be targetted at subgroup sizes larger than 32. The
+ * problem in this sequence is that subgroup size is expected to be >= 32 to
+ * match the masking of local_invocation_id above. If inferior, the umod
+ * operation returns the same value as if the subgroup was 32.
+ */
+static bool is_alu_used_for_umod_subgroup_size(nir_alu_instr *in_alu)
+{
+   nir_foreach_use(src, &in_alu->def) {
+      nir_instr *instr = nir_src_use_instr(src);
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      if (alu->op != nir_op_umod &&
+          alu->op != nir_op_imod)
+         continue;
+
+      for (uint32_t i = 0; i < 2; i++) {
+         if (&alu->src[i].src == src)
+            continue;
+
+         if (!nir_src_is_intrinsic(alu->src[i].src) ||
+             nir_src_as_intrinsic(alu->src[i].src)->intrinsic != nir_intrinsic_load_subgroup_size)
+            continue;
+
+         return true;
+      }
+   }
+
+   return false;
+}
+
+static bool
+is_local_invoc_id_used_with_simd32_assumption(nir_intrinsic_instr *subgroup_inv)
+{
+   nir_foreach_use(src, &subgroup_inv->def) {
+      nir_instr *instr = nir_src_use_instr(src);
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      if (alu->op != nir_op_iand)
+         continue;
+
+      /* nir_print_instr(&alu->instr, stderr); */
+      /* fprintf(stderr, "\n"); */
+
+      for (uint32_t i = 0; i < 2; i++) {
+         if (&alu->src[i].src == src)
+            continue;
+
+         if (!nir_src_is_const(alu->src[i].src))
+            continue;
+
+         if (nir_src_as_uint(alu->src[i].src) != 0xffffffe0)
+            continue;
+
+         if (is_alu_used_for_umod_subgroup_size(alu))
+            return true;
+      }
+   }
+
+   return false;
+}
+
+static bool
+detect_simd32_requirement(nir_builder *b,
+                          nir_intrinsic_instr *intrin,
+                          void *data)
+{
+   switch (intrin->intrinsic) {
+   case nir_intrinsic_shuffle:
+      return is_simd32_shuffle(intrin);
+
+   case nir_intrinsic_load_local_invocation_id:
+      return is_local_invoc_id_used_with_simd32_assumption(intrin);
+
+   default:
+      return false;
+   }
+}
+
 /* List of game-specific workarounds identified by BLAKE3 hash of the shader.
 * Add new workarounds here as needed.
 */
@ -828,7 +912,7 @@ anv_fixup_subgroup_size(struct anv_device *device, nir_shader *shader)
       info->min_subgroup_size != info->max_subgroup_size &&
       info->uses_wide_subgroup_intrinsics &&
       nir_shader_intrinsics_pass(shader,
-                                  detect_simd32_shuffle,
+                                  detect_simd32_requirement,
                                  nir_metadata_all,
                                  NULL)) {
      info->max_subgroup_size = BRW_SUBGROUP_SIZE;