From 12db716fffc9b8382b81d9bb71649ef17bdc6dbe Mon Sep 17 00:00:00 2001
From: Aitor Camacho <aitor@lunarg.com>
Date: Thu, 14 May 2026 00:23:39 +0900
Subject: [PATCH] kk: Fix subgroup failures on M1/2 due to bcsel

f4812dc1 introduces optimizations that turn ior into bcsel. The MSL
compiler will incorrectly compile the shader internally when bcsel is used
leading to incorrect outputs. This commit adds a workaround that tricks
the MSL compiler into correctly compiling the shader internally.

Reviewed-by: squidbus <squidbus@proton.me>
Signed-off-by: Aitor Camacho <aitor@lunarg.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41548>
---
 docs/drivers/kosmickrisp/workarounds.rst | 20 ++++++++++++++++++++
 src/kosmickrisp/compiler/nir_to_msl.c    | 18 +++++++++++-------
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/docs/drivers/kosmickrisp/workarounds.rst b/docs/drivers/kosmickrisp/workarounds.rst
index 2eec9797dcb..239120506b6 100644
--- a/docs/drivers/kosmickrisp/workarounds.rst
+++ b/docs/drivers/kosmickrisp/workarounds.rst
@@ -49,6 +49,26 @@ info on what was updated.
 Workarounds
 ===========
 
+KK_WORKAROUND_10
+----------------
+| macOS version: 26.4.1
+| Metal ticket: Not reported
+| Metal ticket status:
+| CTS test failure: ``dEQP-VK.subgroups.arithmetic.compute.subgroupinclusive*_vec4``
+| Comments:
+
+See comment
+https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41186#note_3470793
+
+In short, certain ``ior`` operations can be and will be turned into ``bcsel``
+before reaching NIR to MSL. However, the MSL compiler seems to incorrectly
+handle ``bcsel`` and the compiled shader misbehaves while the ``ior`` version
+does not. This is worked around by adding a known true value to the conditional
+of the ``bcsel``.
+
+| Log:
+| 2026-05-14: Workaround implemented
+
 KK_WORKAROUND_9
 ---------------
 | macOS version: 26.4.1
diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c
index b7fb0851848..eb10c028aab 100644
--- a/src/kosmickrisp/compiler/nir_to_msl.c
+++ b/src/kosmickrisp/compiler/nir_to_msl.c
@@ -76,8 +76,7 @@ emit_sysvals(struct nir_to_msl_ctx *ctx, nir_shader *shader)
    unsigned i;
    BITSET_FOREACH_SET(i, shader->info.system_values_read, SYSTEM_VALUE_MAX) {
       const char *sysval;
-      if (is_frag_with_post_depth_coverage &&
-          i == SYSTEM_VALUE_SAMPLE_MASK_IN)
+      if (is_frag_with_post_depth_coverage && i == SYSTEM_VALUE_SAMPLE_MASK_IN)
          sysval = sysval_sample_mask_in_post_depth_coverage;
       else
          sysval = sysval_table[i];
@@ -504,6 +503,10 @@ alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr)
       P(ctx, " ? 1.0 : 0.0");
       break;
    case nir_op_bcsel:
+      /* KK_WORKAROUND_10 All shaders will have buf0 bound */
+      if (!(ctx->disabled_workarounds & BITFIELD_BIT(10)) &&
+          ctx->shader->info.stage == MESA_SHADER_COMPUTE)
+         P(ctx, "(ulong)&buf0.contents[0] && ");
       alu_src_to_msl(ctx, instr, 0);
       P(ctx, " ? ");
       alu_src_to_msl(ctx, instr, 1);
@@ -2099,16 +2102,17 @@ lower_ballot(nir_builder *b, nir_intrinsic_instr *intrin, void *_unused)
       return false;
 
    b->cursor = nir_before_instr(&intrin->instr);
-   nir_def* invocation = nir_load_subgroup_invocation(b);
-   nir_def* mask = nir_ishl(b, nir_b2i32(b, intrin->src[0].ssa), invocation);
-   nir_def* reduce = nir_reduce(b, mask, .reduction_op = nir_op_ior);
+   nir_def *invocation = nir_load_subgroup_invocation(b);
+   nir_def *mask = nir_ishl(b, nir_b2i32(b, intrin->src[0].ssa), invocation);
+   nir_def *reduce = nir_reduce(b, mask, .reduction_op = nir_op_ior);
    nir_def_rewrite_uses(&intrin->def, reduce);
 
    return true;
 }
 
-void msl_preprocess_nir_workarounds(struct nir_shader *nir,
-                                    uint64_t disabled_workarounds)
+void
+msl_preprocess_nir_workarounds(struct nir_shader *nir,
+                               uint64_t disabled_workarounds)
 {
    /* KK_WORKAROUND_3 */
    if (!(disabled_workarounds & BITFIELD64_BIT(3))) {