diff --git a/docs/drivers/kosmickrisp/workarounds.rst b/docs/drivers/kosmickrisp/workarounds.rst index 239120506b6..585dd3ccc4a 100644 --- a/docs/drivers/kosmickrisp/workarounds.rst +++ b/docs/drivers/kosmickrisp/workarounds.rst @@ -49,6 +49,29 @@ info on what was updated. Workarounds =========== +KK_WORKAROUND_11 +---------------- +| macOS version: 26.5 +| Metal ticket: FB22683138 +| Metal ticket status: Waiting resolution +| CTS test failure: ``dEQP-VK.api.object_management.multithreaded_per_thread_device.merged_pipeline_cache`` +| Comments: + +If multiple MTL4Compiler instances are created and used concurrently, they may +corrupt the heap and crash the application. This has been verified +independently of KosmicKrisp and reported to Apple using a small demo +application which directly uses Metal. + +The CTS test in question here creates an instance, physical device, and device +for each of multiple threads, and intentionally creates several pipelines in +each thread at the same time. + +To work around this, maintain a table of devices to compilers, and use it to +ensure that each device only has one compiler instance to share. `MTLDevice` +instances are unique within the process, so no matter how many Vulkan devices +we create, the same GPU uses the same `MTLDevice`. Each `MTL4Compiler` instance +is still capable of performing concurrent compilation. + KK_WORKAROUND_10 ---------------- | macOS version: 26.4.1 diff --git a/src/kosmickrisp/vulkan/kk_device.c b/src/kosmickrisp/vulkan/kk_device.c index 19806fcfc4f..53e3cb79169 100644 --- a/src/kosmickrisp/vulkan/kk_device.c +++ b/src/kosmickrisp/vulkan/kk_device.c @@ -23,6 +23,83 @@ #include +struct kk_mtl_compiler { + mtl_compiler *handle; + uint32_t refcount; +}; + +static struct hash_table compilers_ht; +static simple_mtx_t compilers_ht_lock; +static once_flag compilers_ht_once = ONCE_FLAG_INIT; + +static void +kk_init_compiler_table() +{ + _mesa_pointer_hash_table_init(&compilers_ht, NULL); + simple_mtx_init(&compilers_ht_lock, mtx_plain); +} + +static mtl_compiler * +kk_acquire_compiler(struct kk_device *dev) +{ + /* KK_WORKAROUND_11 */ + if (dev->disabled_workarounds & BITFIELD64_BIT(11)) { + return mtl_new_compiler(dev->mtl_handle); + } + + call_once(&compilers_ht_once, kk_init_compiler_table); + simple_mtx_lock(&compilers_ht_lock); + + struct hash_entry *ent = + _mesa_hash_table_search(&compilers_ht, dev->mtl_handle); + + struct kk_mtl_compiler *compiler; + if (ent == NULL) { + compiler = ralloc(NULL, struct kk_mtl_compiler); + if (compiler == NULL) { + simple_mtx_unlock(&compilers_ht_lock); + return NULL; + } + + compiler->handle = mtl_new_compiler(dev->mtl_handle); + compiler->refcount = 1; + _mesa_hash_table_insert(&compilers_ht, dev->mtl_handle, compiler); + } else { + compiler = ent->data; + compiler->refcount++; + } + + simple_mtx_unlock(&compilers_ht_lock); + return compiler->handle; +} + +static void +kk_release_compiler(struct kk_device *dev) +{ + /* KK_WORKAROUND_11 */ + if (dev->disabled_workarounds & BITFIELD64_BIT(11)) { + mtl_release(dev->mtl_compiler_handle); + return; + } + + simple_mtx_lock(&compilers_ht_lock); + + struct hash_entry *ent = + _mesa_hash_table_search(&compilers_ht, dev->mtl_handle); + if (ent != NULL) { + struct kk_mtl_compiler *compiler = ent->data; + --compiler->refcount; + + if (compiler->refcount == 0) { + _mesa_hash_table_remove(&compilers_ht, ent); + mtl_release(compiler->handle); + ralloc_free(compiler); + } + } + + simple_mtx_unlock(&compilers_ht_lock); +} + DERIVE_HASH_TABLE(mtl_sampler_packed); static VkResult @@ -215,8 +292,10 @@ kk_CreateDevice(VkPhysicalDevice physicalDevice, dev->vk.command_dispatch_table = &dev->vk.dispatch_table; dev->vk.get_timestamp = kk_get_timestamp; + kk_parse_device_environment_options(dev); + /* Create a new Metal pipeline compiler for the device */ - dev->mtl_compiler_handle = mtl_new_compiler(dev->mtl_handle); + dev->mtl_compiler_handle = kk_acquire_compiler(dev); if (dev->mtl_compiler_handle == NULL) goto fail_init; @@ -251,8 +330,6 @@ kk_CreateDevice(VkPhysicalDevice physicalDevice, if (result != VK_SUCCESS) goto fail_sampler_heap; - kk_parse_device_environment_options(dev); - *pDevice = kk_device_to_handle(dev); return VK_SUCCESS; @@ -272,7 +349,7 @@ fail_vab_memory: mtl_release(dev->residency_set.handle); simple_mtx_destroy(&dev->residency_set.mutex); fail_compiler: - mtl_release(dev->mtl_compiler_handle); + kk_release_compiler(dev); fail_init: vk_device_finish(&dev->vk); fail_alloc: @@ -313,7 +390,7 @@ kk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) mtl_release(dev->residency_set.handle); simple_mtx_destroy(&dev->residency_set.mutex); - mtl_release(dev->mtl_compiler_handle); + kk_release_compiler(dev); vk_device_finish(&dev->vk);