kk: Fix subgroup failures on M1/2 due to bcsel

f4812dc1 introduces optimizations that turn ior into bcsel. The MSL compiler will incorrectly compile the shader internally when bcsel is used leading to incorrect outputs. This commit adds a workaround that tricks the MSL compiler into correctly compiling the shader internally. Reviewed-by: squidbus <squidbus@proton.me> Signed-off-by: Aitor Camacho <aitor@lunarg.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41548>
2026-05-15 05:38:11 +02:00 · 2026-05-14 00:23:39 +09:00 · 2026-05-14 00:23:39 +09:00 · 12db716fff
commit 12db716fff
parent 1323939f63
2 changed files with 31 additions and 7 deletions
--- a/docs/drivers/kosmickrisp/workarounds.rst
+++ b/docs/drivers/kosmickrisp/workarounds.rst
@ -49,6 +49,26 @@ info on what was updated.
 Workarounds
 ===========

+KK_WORKAROUND_10
+----------------
+| macOS version: 26.4.1
+| Metal ticket: Not reported
+| Metal ticket status:
+| CTS test failure: ``dEQP-VK.subgroups.arithmetic.compute.subgroupinclusive*_vec4``
+| Comments:
+
+See comment
+https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41186#note_3470793
+
+In short, certain ``ior`` operations can be and will be turned into ``bcsel``
+before reaching NIR to MSL. However, the MSL compiler seems to incorrectly
+handle ``bcsel`` and the compiled shader misbehaves while the ``ior`` version
+does not. This is worked around by adding a known true value to the conditional
+of the ``bcsel``.
+
+| Log:
+| 2026-05-14: Workaround implemented
+
 KK_WORKAROUND_9
 ---------------
 | macOS version: 26.4.1
--- a/src/kosmickrisp/compiler/nir_to_msl.c
+++ b/src/kosmickrisp/compiler/nir_to_msl.c
@ -76,8 +76,7 @@ emit_sysvals(struct nir_to_msl_ctx *ctx, nir_shader *shader)
   unsigned i;
   BITSET_FOREACH_SET(i, shader->info.system_values_read, SYSTEM_VALUE_MAX) {
      const char *sysval;
-      if (is_frag_with_post_depth_coverage &&
-          i == SYSTEM_VALUE_SAMPLE_MASK_IN)
+      if (is_frag_with_post_depth_coverage && i == SYSTEM_VALUE_SAMPLE_MASK_IN)
         sysval = sysval_sample_mask_in_post_depth_coverage;
      else
         sysval = sysval_table[i];
@ -504,6 +503,10 @@ alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr)
      P(ctx, " ? 1.0 : 0.0");
      break;
   case nir_op_bcsel:
+      /* KK_WORKAROUND_10 All shaders will have buf0 bound */
+      if (!(ctx->disabled_workarounds & BITFIELD_BIT(10)) &&
+          ctx->shader->info.stage == MESA_SHADER_COMPUTE)
+         P(ctx, "(ulong)&buf0.contents[0] && ");
      alu_src_to_msl(ctx, instr, 0);
      P(ctx, " ? ");
      alu_src_to_msl(ctx, instr, 1);
@ -2099,16 +2102,17 @@ lower_ballot(nir_builder *b, nir_intrinsic_instr *intrin, void *_unused)
      return false;

   b->cursor = nir_before_instr(&intrin->instr);
-   nir_def* invocation = nir_load_subgroup_invocation(b);
-   nir_def* mask = nir_ishl(b, nir_b2i32(b, intrin->src[0].ssa), invocation);
-   nir_def* reduce = nir_reduce(b, mask, .reduction_op = nir_op_ior);
+   nir_def *invocation = nir_load_subgroup_invocation(b);
+   nir_def *mask = nir_ishl(b, nir_b2i32(b, intrin->src[0].ssa), invocation);
+   nir_def *reduce = nir_reduce(b, mask, .reduction_op = nir_op_ior);
   nir_def_rewrite_uses(&intrin->def, reduce);

   return true;
 }

-void msl_preprocess_nir_workarounds(struct nir_shader *nir,
-                                    uint64_t disabled_workarounds)
+void
+msl_preprocess_nir_workarounds(struct nir_shader *nir,
+                               uint64_t disabled_workarounds)
 {
   /* KK_WORKAROUND_3 */
   if (!(disabled_workarounds & BITFIELD64_BIT(3))) {