From 12db716fffc9b8382b81d9bb71649ef17bdc6dbe Mon Sep 17 00:00:00 2001 From: Aitor Camacho Date: Thu, 14 May 2026 00:23:39 +0900 Subject: [PATCH] kk: Fix subgroup failures on M1/2 due to bcsel f4812dc1 introduces optimizations that turn ior into bcsel. The MSL compiler will incorrectly compile the shader internally when bcsel is used leading to incorrect outputs. This commit adds a workaround that tricks the MSL compiler into correctly compiling the shader internally. Reviewed-by: squidbus Signed-off-by: Aitor Camacho Part-of: --- docs/drivers/kosmickrisp/workarounds.rst | 20 ++++++++++++++++++++ src/kosmickrisp/compiler/nir_to_msl.c | 18 +++++++++++------- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/docs/drivers/kosmickrisp/workarounds.rst b/docs/drivers/kosmickrisp/workarounds.rst index 2eec9797dcb..239120506b6 100644 --- a/docs/drivers/kosmickrisp/workarounds.rst +++ b/docs/drivers/kosmickrisp/workarounds.rst @@ -49,6 +49,26 @@ info on what was updated. Workarounds =========== +KK_WORKAROUND_10 +---------------- +| macOS version: 26.4.1 +| Metal ticket: Not reported +| Metal ticket status: +| CTS test failure: ``dEQP-VK.subgroups.arithmetic.compute.subgroupinclusive*_vec4`` +| Comments: + +See comment +https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41186#note_3470793 + +In short, certain ``ior`` operations can be and will be turned into ``bcsel`` +before reaching NIR to MSL. However, the MSL compiler seems to incorrectly +handle ``bcsel`` and the compiled shader misbehaves while the ``ior`` version +does not. This is worked around by adding a known true value to the conditional +of the ``bcsel``. + +| Log: +| 2026-05-14: Workaround implemented + KK_WORKAROUND_9 --------------- | macOS version: 26.4.1 diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c index b7fb0851848..eb10c028aab 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.c +++ b/src/kosmickrisp/compiler/nir_to_msl.c @@ -76,8 +76,7 @@ emit_sysvals(struct nir_to_msl_ctx *ctx, nir_shader *shader) unsigned i; BITSET_FOREACH_SET(i, shader->info.system_values_read, SYSTEM_VALUE_MAX) { const char *sysval; - if (is_frag_with_post_depth_coverage && - i == SYSTEM_VALUE_SAMPLE_MASK_IN) + if (is_frag_with_post_depth_coverage && i == SYSTEM_VALUE_SAMPLE_MASK_IN) sysval = sysval_sample_mask_in_post_depth_coverage; else sysval = sysval_table[i]; @@ -504,6 +503,10 @@ alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr) P(ctx, " ? 1.0 : 0.0"); break; case nir_op_bcsel: + /* KK_WORKAROUND_10 All shaders will have buf0 bound */ + if (!(ctx->disabled_workarounds & BITFIELD_BIT(10)) && + ctx->shader->info.stage == MESA_SHADER_COMPUTE) + P(ctx, "(ulong)&buf0.contents[0] && "); alu_src_to_msl(ctx, instr, 0); P(ctx, " ? "); alu_src_to_msl(ctx, instr, 1); @@ -2099,16 +2102,17 @@ lower_ballot(nir_builder *b, nir_intrinsic_instr *intrin, void *_unused) return false; b->cursor = nir_before_instr(&intrin->instr); - nir_def* invocation = nir_load_subgroup_invocation(b); - nir_def* mask = nir_ishl(b, nir_b2i32(b, intrin->src[0].ssa), invocation); - nir_def* reduce = nir_reduce(b, mask, .reduction_op = nir_op_ior); + nir_def *invocation = nir_load_subgroup_invocation(b); + nir_def *mask = nir_ishl(b, nir_b2i32(b, intrin->src[0].ssa), invocation); + nir_def *reduce = nir_reduce(b, mask, .reduction_op = nir_op_ior); nir_def_rewrite_uses(&intrin->def, reduce); return true; } -void msl_preprocess_nir_workarounds(struct nir_shader *nir, - uint64_t disabled_workarounds) +void +msl_preprocess_nir_workarounds(struct nir_shader *nir, + uint64_t disabled_workarounds) { /* KK_WORKAROUND_3 */ if (!(disabled_workarounds & BITFIELD64_BIT(3))) {