mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
kk: Enable float16 and int8
Metal does not seem to respect memory coherency for threads. Workaround 6 enforces device coherency for global loads/stores even if it should not be needed. Reviewed-by: Arcady Goldmints-Orlov <arcady@lunarg.com> Signed-off-by: Aitor Camacho <aitor@lunarg.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38847>
This commit is contained in:
parent
1e0e7d4296
commit
16a94b1b7f
4 changed files with 45 additions and 24 deletions
|
|
@ -49,6 +49,37 @@ info on what was updated.
|
||||||
Workarounds
|
Workarounds
|
||||||
===========
|
===========
|
||||||
|
|
||||||
|
KK_WORKAROUND_6
|
||||||
|
---------------
|
||||||
|
| macOS version: 26.0.1
|
||||||
|
| Metal ticket: Not reported
|
||||||
|
| Metal ticket status:
|
||||||
|
| CTS test failure: ``dEQP-VK.spirv_assembly.instruction.*.float16.opcompositeinsert.*``
|
||||||
|
| Comments:
|
||||||
|
|
||||||
|
Metal does not respect its own Memory Coherency Model (MSL spec 4.8). From
|
||||||
|
the spec:
|
||||||
|
``By default, memory in the device address space has threadgroup coherence.``
|
||||||
|
|
||||||
|
If we have a single thread compute dispatch so that we do (simplified version):
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
for (...) {
|
||||||
|
value = ssbo_data[0]; // ssbo_data is a device buffer
|
||||||
|
...
|
||||||
|
ssbo_data[0] = new_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
``ssbo_data[0]`` will not correctly store/load the values so the value
|
||||||
|
written in iteration 0, will not be available in iteration 1. The workaround
|
||||||
|
to this issue is marking the device memory pointer through which the memory
|
||||||
|
is accessed as coherent so that the value is stored and loaded correctly.
|
||||||
|
Hopefully this does not affect performance much.
|
||||||
|
|
||||||
|
| Log:
|
||||||
|
| 2025-12-08: Workaround implemented and reported to Apple
|
||||||
|
|
||||||
KK_WORKAROUND_5
|
KK_WORKAROUND_5
|
||||||
---------------
|
---------------
|
||||||
| macOS version: 26.0.1
|
| macOS version: 26.0.1
|
||||||
|
|
|
||||||
|
|
@ -475,7 +475,7 @@ Vulkan 1.2 -- all DONE: anv, hk, nvk, panvk/v10+, pvr, tu, vn
|
||||||
VK_KHR_sampler_mirror_clamp_to_edge DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
VK_KHR_sampler_mirror_clamp_to_edge DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
||||||
VK_KHR_separate_depth_stencil_layouts DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
VK_KHR_separate_depth_stencil_layouts DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
||||||
VK_KHR_shader_atomic_int64 DONE (anv, lvp, nvk, panvk/v10+, radv, vn, tu/a740+)
|
VK_KHR_shader_atomic_int64 DONE (anv, lvp, nvk, panvk/v10+, radv, vn, tu/a740+)
|
||||||
VK_KHR_shader_float16_int8 DONE (anv, dzn, hasvk, lvp, nvk, panvk, radv, tu, vn)
|
VK_KHR_shader_float16_int8 DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, radv, tu, vn)
|
||||||
VK_KHR_shader_float_controls DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
VK_KHR_shader_float_controls DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
||||||
VK_KHR_shader_subgroup_extended_types DONE (anv, hasvk, kk, lvp, nvk, panvk/v10+, pvr, radv, tu, vn)
|
VK_KHR_shader_subgroup_extended_types DONE (anv, hasvk, kk, lvp, nvk, panvk/v10+, pvr, radv, tu, vn)
|
||||||
VK_KHR_spirv_1_4 DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk/v10+, pvr, radv, tu, v3dv, vn)
|
VK_KHR_spirv_1_4 DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk/v10+, pvr, radv, tu, v3dv, vn)
|
||||||
|
|
|
||||||
|
|
@ -1063,8 +1063,11 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
|
||||||
case nir_intrinsic_load_global: {
|
case nir_intrinsic_load_global: {
|
||||||
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||||
const char *type = msl_type_for_def(ctx->types, &instr->def);
|
const char *type = msl_type_for_def(ctx->types, &instr->def);
|
||||||
const char *addressing =
|
const bool apply_workaround =
|
||||||
access & ACCESS_COHERENT ? "coherent device" : "device";
|
!(ctx->disabled_workarounds & BITFIELD64_BIT(6));
|
||||||
|
const char *addressing = apply_workaround || (access & ACCESS_COHERENT)
|
||||||
|
? "coherent device"
|
||||||
|
: "device";
|
||||||
if (access & ACCESS_ATOMIC) {
|
if (access & ACCESS_ATOMIC) {
|
||||||
assert(instr->num_components == 1u &&
|
assert(instr->num_components == 1u &&
|
||||||
"We can only do single component with atomics");
|
"We can only do single component with atomics");
|
||||||
|
|
@ -1133,8 +1136,11 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
|
||||||
case nir_intrinsic_store_global: {
|
case nir_intrinsic_store_global: {
|
||||||
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||||
const char *type = msl_type_for_src(ctx->types, &instr->src[0]);
|
const char *type = msl_type_for_src(ctx->types, &instr->src[0]);
|
||||||
const char *addressing =
|
const bool apply_workaround =
|
||||||
access & ACCESS_COHERENT ? "coherent device" : "device";
|
!(ctx->disabled_workarounds & BITFIELD64_BIT(6));
|
||||||
|
const char *addressing = apply_workaround || (access & ACCESS_COHERENT)
|
||||||
|
? "coherent device"
|
||||||
|
: "device";
|
||||||
if (access & ACCESS_ATOMIC) {
|
if (access & ACCESS_ATOMIC) {
|
||||||
assert(instr->num_components == 1u &&
|
assert(instr->num_components == 1u &&
|
||||||
"We can only do single component with atomics");
|
"We can only do single component with atomics");
|
||||||
|
|
|
||||||
|
|
@ -74,8 +74,7 @@ kk_get_device_extensions(const struct kk_instance *instance,
|
||||||
.KHR_separate_depth_stencil_layouts = true,
|
.KHR_separate_depth_stencil_layouts = true,
|
||||||
.KHR_shader_atomic_int64 = false,
|
.KHR_shader_atomic_int64 = false,
|
||||||
.KHR_shader_float_controls = true,
|
.KHR_shader_float_controls = true,
|
||||||
.KHR_shader_float16_int8 =
|
.KHR_shader_float16_int8 = true,
|
||||||
false, /* TODO_KOSMICKRISP shaderInt8 shaderFloat16 */
|
|
||||||
.KHR_shader_subgroup_extended_types = true,
|
.KHR_shader_subgroup_extended_types = true,
|
||||||
.KHR_spirv_1_4 = true,
|
.KHR_spirv_1_4 = true,
|
||||||
.KHR_timeline_semaphore = true,
|
.KHR_timeline_semaphore = true,
|
||||||
|
|
@ -214,25 +213,10 @@ kk_get_device_features(
|
||||||
.samplerMirrorClampToEdge = true,
|
.samplerMirrorClampToEdge = true,
|
||||||
.scalarBlockLayout = true,
|
.scalarBlockLayout = true,
|
||||||
.separateDepthStencilLayouts = true,
|
.separateDepthStencilLayouts = true,
|
||||||
/* TODO_KOSMICKRISP shaderFloat16
|
.shaderFloat16 = true,
|
||||||
* Failing:
|
|
||||||
* dEQP-VK.spirv_assembly.instruction.*.float16.opcompositeinsert.*
|
|
||||||
* dEQP-VK.memory_model.shared.16bit.nested_structs_arrays.*
|
|
||||||
*/
|
|
||||||
.shaderFloat16 = false,
|
|
||||||
.shaderInputAttachmentArrayDynamicIndexing = true,
|
.shaderInputAttachmentArrayDynamicIndexing = true,
|
||||||
.shaderInputAttachmentArrayNonUniformIndexing = true,
|
.shaderInputAttachmentArrayNonUniformIndexing = true,
|
||||||
/* TODO_KOSMICKRISP shaderInt8
|
.shaderInt8 = true,
|
||||||
* Multiple MSL compiler crashes if we enable shaderInt8, need to
|
|
||||||
* understand why and a workaround:
|
|
||||||
* dEQP-VK.memory_model.shared.8bit.vector_types.9
|
|
||||||
* dEQP-VK.memory_model.shared.8bit.basic_types.8
|
|
||||||
* dEQP-VK.memory_model.shared.8bit.basic_arrays.2
|
|
||||||
* dEQP-VK.memory_model.shared.8bit.arrays_of_arrays.1
|
|
||||||
* dEQP-VK.memory_model.shared.8bit.arrays_of_arrays.8
|
|
||||||
* Probably more
|
|
||||||
*/
|
|
||||||
.shaderInt8 = false,
|
|
||||||
.shaderOutputLayer = true,
|
.shaderOutputLayer = true,
|
||||||
.shaderOutputViewportIndex = true,
|
.shaderOutputViewportIndex = true,
|
||||||
.shaderSampledImageArrayNonUniformIndexing = true,
|
.shaderSampledImageArrayNonUniformIndexing = true,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue