mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-19 04:08:25 +02:00
kk: Fix issues with maximal reconvergence
If a loop has only one break case, Metal appears to re-order it to after the loop ends, which goes against the expected behavior for reconvergence. Work around this by putting the break statement into a trivial, always-true runtime conditional, when maximal reconvergence is requested. Fixes dEQP-VK.reconvergence.maximal.compute.nesting* Reviewed-by: Arcady Goldmints-Orlov <arcady@lunarg.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41229>
This commit is contained in:
parent
26ec32dada
commit
76a3951e03
3 changed files with 42 additions and 7 deletions
|
|
@ -49,6 +49,39 @@ info on what was updated.
|
|||
Workarounds
|
||||
===========
|
||||
|
||||
KK_WORKAROUND_9
|
||||
---------------
|
||||
| macOS version: 26.4.1
|
||||
| Metal ticket: Not reported
|
||||
| Metal ticket status:
|
||||
| CTS test failure: ``dEQP-VK.reconvergence.maximal.compute.nesting*``
|
||||
| Comments:
|
||||
|
||||
Metal seems to re-order the sole break case of a loop such that execution
|
||||
reconverges earlier than expected.
|
||||
|
||||
From the above mentioned CTS test, consider the following, which is the
|
||||
only code path that breaks within a loop:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
if (subgroupElect()) {
|
||||
outputC.loc[gl_LocalInvocationIndex]++;
|
||||
outputB.b[(outLoc++)*invocationStride + gl_LocalInvocationIndex] =
|
||||
subgroupBallot(true);
|
||||
break;
|
||||
}
|
||||
|
||||
The test expects the ``subgroupBallot`` to yield just one bit set for the
|
||||
thread picked by ``subgroupElect``; however, Metal returns the full 0xFFFFFFFF,
|
||||
presumably because it re-ordered the operations to after the loop.
|
||||
|
||||
To work around this, we add a trivial, always-true runtime condition to the
|
||||
break to ensure that the prior logic is not re-ordered.
|
||||
|
||||
| Log:
|
||||
| 2026-04-27: Workaround implemented
|
||||
|
||||
KK_WORKAROUND_7
|
||||
---------------
|
||||
| macOS version: 26.0.1
|
||||
|
|
|
|||
|
|
@ -1809,7 +1809,13 @@ jump_instr_to_msl(struct nir_to_msl_ctx *ctx, nir_jump_instr *jump)
|
|||
assert(!"Unimplemented");
|
||||
break;
|
||||
case nir_jump_break:
|
||||
P_IND(ctx, "break;\n");
|
||||
/* KK_WORKAROUND_9 */
|
||||
if (!ctx->shader->info.maximally_reconverges ||
|
||||
(ctx->disabled_workarounds & BITFIELD64_BIT(9))) {
|
||||
P_IND(ctx, "break;\n");
|
||||
} else {
|
||||
P_IND(ctx, "if ((ulong)simd_active_threads_mask()) break;\n");
|
||||
}
|
||||
break;
|
||||
case nir_jump_continue:
|
||||
P_IND(ctx, "continue;\n");
|
||||
|
|
|
|||
|
|
@ -123,9 +123,7 @@ kk_get_device_extensions(const struct kk_instance *instance,
|
|||
|
||||
/* Optional extensions */
|
||||
.KHR_calibrated_timestamps = true,
|
||||
/* Temporarily disabled due to failing tests in
|
||||
* dEQP-VK.reconvergence.maximal.compute.nesting* */
|
||||
.KHR_shader_maximal_reconvergence = false,
|
||||
.KHR_shader_maximal_reconvergence = true,
|
||||
.KHR_shader_relaxed_extended_instruction = true,
|
||||
.KHR_shader_subgroup_uniform_control_flow = true,
|
||||
#ifdef KK_USE_WSI_PLATFORM
|
||||
|
|
@ -287,9 +285,7 @@ kk_get_device_features(
|
|||
.shaderExpectAssume = true,
|
||||
|
||||
/* VK_KHR_shader_maximal_reconvergence */
|
||||
/* Temporarily disabled due to failing tests in
|
||||
* dEQP-VK.reconvergence.maximal.compute.nesting* */
|
||||
.shaderMaximalReconvergence = false,
|
||||
.shaderMaximalReconvergence = true,
|
||||
|
||||
/* VK_KHR_shader_relaxed_extended_instruction */
|
||||
.shaderRelaxedExtendedInstruction = true,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue