turnip: Add an override to uncached memory type

When uncached memory type is used under emulation then most games have a
significant performance penalty due to accessing the buffer atomically.

Instead when this option is set, it will override uncached buffer
allocations to instead be cached+coherent if the host supports it. This
allows the atomic accesses to still be done but not have abysmal
performance.
This commit is contained in:
Ryan Houdek 2026-05-01 13:30:20 -07:00
parent 80e6b468f4
commit e5a20d42ac
No known key found for this signature in database
5 changed files with 45 additions and 1 deletions

View file

@ -43,7 +43,10 @@ tu_cmd_buffer_setup_status_tracking(struct tu_device *device)
device, NULL, &status_bo, sizeof(enum tu_cmd_buffer_status), 0,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
(device->physical_device->preferred_uncached_as_cached_index >= 0 ?
VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0)
,
TU_BO_ALLOC_INTERNAL_RESOURCE, NULL, "cmd_buffer_status");
if (result != VK_SUCCESS)
return NULL;

View file

@ -1687,6 +1687,7 @@ tu_physical_device_init(struct tu_physical_device *device,
device->level1_dcache_size = util_cache_granularity();
device->has_cached_non_coherent_memory =
device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
device->preferred_uncached_as_cached_index = -1;
device->memory.type_count = 1;
device->memory.types[0] =
@ -1700,6 +1701,11 @@ tu_physical_device_init(struct tu_physical_device *device,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
if (instance->override_uncached_as_cache_coherent) {
/* Retain this memory type index to override later. */
device->preferred_uncached_as_cached_index = device->memory.type_count;
}
device->memory.type_count++;
}
@ -1851,6 +1857,7 @@ static const driOptionDescription tu_dri_options[] = {
DRI_CONF_TU_ENABLE_SOFTFLOAT32(false)
DRI_CONF_TU_EMULATE_ALPHA_TO_COVERAGE(false)
DRI_CONF_TU_AUTOTUNE_ALGORITHM()
DRI_CONF_TU_OVERRIDE_UNCACHED_AS_CACHE_COHERENT(false)
DRI_CONF_SECTION_END
};
@ -1885,6 +1892,8 @@ tu_init_dri_options(struct tu_instance *instance)
driQueryOptionb(&instance->dri_options, "tu_emulate_alpha_to_coverage");
instance->autotune_algo =
driQueryOptionstr(&instance->dri_options, "tu_autotune_algorithm");
instance->override_uncached_as_cache_coherent =
driQueryOptionb(&instance->dri_options, "tu_override_uncached_as_cache_coherent");
}
static uint32_t instance_count = 0;

View file

@ -134,6 +134,10 @@ struct tu_physical_device
bool has_cached_coherent_memory;
bool has_cached_non_coherent_memory;
/* Index for device local, host-coherent, host-cached memory.
* Only set to positive index when overwriting device-local, uncached memory
*/
int32_t preferred_uncached_as_cached_index;
uintptr_t level1_dcache_size;
struct fdl_ubwc_config ubwc_config;
@ -240,6 +244,11 @@ struct tu_instance
/* Configuration option to use a specific autotune algorithm by default. */
const char *autotune_algo;
/* When enabled, replaces uncached+host_visible allocations
* with cached+coherent+host_visible when the hardware supports it.
*/
bool override_uncached_as_cache_coherent;
};
VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance,
VK_OBJECT_TYPE_INSTANCE)

View file

@ -43,6 +43,26 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev,
size = align64(size, os_page_size);
const VkMemoryPropertyFlags replace_flags_mask =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
const VkMemoryPropertyFlags replace_flags_match =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
if (dev->physical_device->preferred_uncached_as_cached_index >= 0 &&
(mem_property & replace_flags_mask) == replace_flags_match) {
/* Override the memory type if the requested type was uncached, only replacing
* if the device supports cached-coherent memory type.
*/
mem_property =
dev->physical_device->memory.types[dev->physical_device->preferred_uncached_as_cached_index];
}
VkResult result =
dev->instance->knl->bo_init(dev, base, out_bo, size, client_iova,
mem_property, flags, lazy_vma, name);

View file

@ -668,6 +668,9 @@
DRI_CONF_OPT_S_NODEF(tu_autotune_algorithm, \
"Set the preferred autotune algorithm")
#define DRI_CONF_TU_OVERRIDE_UNCACHED_AS_CACHE_COHERENT(def) \
DRI_CONF_OPT_B(tu_override_uncached_as_cache_coherent, def, \
"Replaces uncached-host allocations with cached-coherent-host when possible. Only useful under x86 emulation where memory accesses tend to be atomic")
/**
* \brief Honeykrisp specific configuration options
*/