From e5a20d42acb926debd853aa4a07a431f2e570486 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 1 May 2026 13:30:20 -0700 Subject: [PATCH] turnip: Add an override to uncached memory type When uncached memory type is used under emulation then most games have a significant performance penalty due to accessing the buffer atomically. Instead when this option is set, it will override uncached buffer allocations to instead be cached+coherent if the host supports it. This allows the atomic accesses to still be done but not have abysmal performance. --- src/freedreno/vulkan/tu_cmd_buffer.cc | 5 ++++- src/freedreno/vulkan/tu_device.cc | 9 +++++++++ src/freedreno/vulkan/tu_device.h | 9 +++++++++ src/freedreno/vulkan/tu_knl.cc | 20 ++++++++++++++++++++ src/util/driconf.h | 3 +++ 5 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 2949c06399b..bef377b7900 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -43,7 +43,10 @@ tu_cmd_buffer_setup_status_tracking(struct tu_device *device) device, NULL, &status_bo, sizeof(enum tu_cmd_buffer_status), 0, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + (device->physical_device->preferred_uncached_as_cached_index >= 0 ? + VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0) + , TU_BO_ALLOC_INTERNAL_RESOURCE, NULL, "cmd_buffer_status"); if (result != VK_SUCCESS) return NULL; diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 67468e5aa64..f324fd4a04f 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1687,6 +1687,7 @@ tu_physical_device_init(struct tu_physical_device *device, device->level1_dcache_size = util_cache_granularity(); device->has_cached_non_coherent_memory = device->level1_dcache_size > 0 && !DETECT_ARCH_ARM; + device->preferred_uncached_as_cached_index = -1; device->memory.type_count = 1; device->memory.types[0] = @@ -1700,6 +1701,11 @@ tu_physical_device_init(struct tu_physical_device *device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if (instance->override_uncached_as_cache_coherent) { + /* Retain this memory type index to override later. */ + device->preferred_uncached_as_cached_index = device->memory.type_count; + } device->memory.type_count++; } @@ -1851,6 +1857,7 @@ static const driOptionDescription tu_dri_options[] = { DRI_CONF_TU_ENABLE_SOFTFLOAT32(false) DRI_CONF_TU_EMULATE_ALPHA_TO_COVERAGE(false) DRI_CONF_TU_AUTOTUNE_ALGORITHM() + DRI_CONF_TU_OVERRIDE_UNCACHED_AS_CACHE_COHERENT(false) DRI_CONF_SECTION_END }; @@ -1885,6 +1892,8 @@ tu_init_dri_options(struct tu_instance *instance) driQueryOptionb(&instance->dri_options, "tu_emulate_alpha_to_coverage"); instance->autotune_algo = driQueryOptionstr(&instance->dri_options, "tu_autotune_algorithm"); + instance->override_uncached_as_cache_coherent = + driQueryOptionb(&instance->dri_options, "tu_override_uncached_as_cache_coherent"); } static uint32_t instance_count = 0; diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index c9f521fcc15..e5e58c99e2b 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -134,6 +134,10 @@ struct tu_physical_device bool has_cached_coherent_memory; bool has_cached_non_coherent_memory; + /* Index for device local, host-coherent, host-cached memory. + * Only set to positive index when overwriting device-local, uncached memory + */ + int32_t preferred_uncached_as_cached_index; uintptr_t level1_dcache_size; struct fdl_ubwc_config ubwc_config; @@ -240,6 +244,11 @@ struct tu_instance /* Configuration option to use a specific autotune algorithm by default. */ const char *autotune_algo; + + /* When enabled, replaces uncached+host_visible allocations + * with cached+coherent+host_visible when the hardware supports it. + */ + bool override_uncached_as_cache_coherent; }; VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) diff --git a/src/freedreno/vulkan/tu_knl.cc b/src/freedreno/vulkan/tu_knl.cc index 081393c974c..ab27798c0f6 100644 --- a/src/freedreno/vulkan/tu_knl.cc +++ b/src/freedreno/vulkan/tu_knl.cc @@ -43,6 +43,26 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev, size = align64(size, os_page_size); + const VkMemoryPropertyFlags replace_flags_mask = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + const VkMemoryPropertyFlags replace_flags_match = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + if (dev->physical_device->preferred_uncached_as_cached_index >= 0 && + (mem_property & replace_flags_mask) == replace_flags_match) { + /* Override the memory type if the requested type was uncached, only replacing + * if the device supports cached-coherent memory type. + */ + mem_property = + dev->physical_device->memory.types[dev->physical_device->preferred_uncached_as_cached_index]; + } + VkResult result = dev->instance->knl->bo_init(dev, base, out_bo, size, client_iova, mem_property, flags, lazy_vma, name); diff --git a/src/util/driconf.h b/src/util/driconf.h index fc669e82e91..a15068c6b87 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -668,6 +668,9 @@ DRI_CONF_OPT_S_NODEF(tu_autotune_algorithm, \ "Set the preferred autotune algorithm") +#define DRI_CONF_TU_OVERRIDE_UNCACHED_AS_CACHE_COHERENT(def) \ + DRI_CONF_OPT_B(tu_override_uncached_as_cache_coherent, def, \ + "Replaces uncached-host allocations with cached-coherent-host when possible. Only useful under x86 emulation where memory accesses tend to be atomic") /** * \brief Honeykrisp specific configuration options */