diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 9e00175d80f..69865019d4c 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -43,7 +43,10 @@ tu_cmd_buffer_setup_status_tracking(struct tu_device *device) device, NULL, &status_bo, sizeof(enum tu_cmd_buffer_status), 0, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + (device->physical_device->preferred_uncached_as_cached_index >= 0 ? + VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0) + , TU_BO_ALLOC_INTERNAL_RESOURCE, NULL, "cmd_buffer_status"); if (result != VK_SUCCESS) return NULL; diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 2e755551dca..d0707de407b 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1686,6 +1686,7 @@ tu_physical_device_init(struct tu_physical_device *device, device->level1_dcache_size = util_cache_granularity(); device->has_cached_non_coherent_memory = device->level1_dcache_size > 0 && !DETECT_ARCH_ARM; + device->preferred_uncached_as_cached_index = -1; device->memory.type_count = 1; device->memory.types[0] = @@ -1699,6 +1700,11 @@ tu_physical_device_init(struct tu_physical_device *device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if (instance->override_uncached_as_cache_coherent) { + /* Retain this memory type index to override later. */ + device->preferred_uncached_as_cached_index = device->memory.type_count; + } device->memory.type_count++; } @@ -1850,6 +1856,7 @@ static const driOptionDescription tu_dri_options[] = { DRI_CONF_TU_ENABLE_SOFTFLOAT32(false) DRI_CONF_TU_EMULATE_ALPHA_TO_COVERAGE(false) DRI_CONF_TU_AUTOTUNE_ALGORITHM() + DRI_CONF_TU_OVERRIDE_UNCACHED_AS_CACHE_COHERENT(false) DRI_CONF_SECTION_END }; @@ -1884,6 +1891,8 @@ tu_init_dri_options(struct tu_instance *instance) driQueryOptionb(&instance->dri_options, "tu_emulate_alpha_to_coverage"); instance->autotune_algo = driQueryOptionstr(&instance->dri_options, "tu_autotune_algorithm"); + instance->override_uncached_as_cache_coherent = + driQueryOptionb(&instance->dri_options, "tu_override_uncached_as_cache_coherent"); } static uint32_t instance_count = 0; diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index c9f521fcc15..e5e58c99e2b 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -134,6 +134,10 @@ struct tu_physical_device bool has_cached_coherent_memory; bool has_cached_non_coherent_memory; + /* Index for device local, host-coherent, host-cached memory. + * Only set to positive index when overwriting device-local, uncached memory + */ + int32_t preferred_uncached_as_cached_index; uintptr_t level1_dcache_size; struct fdl_ubwc_config ubwc_config; @@ -240,6 +244,11 @@ struct tu_instance /* Configuration option to use a specific autotune algorithm by default. */ const char *autotune_algo; + + /* When enabled, replaces uncached+host_visible allocations + * with cached+coherent+host_visible when the hardware supports it. + */ + bool override_uncached_as_cache_coherent; }; VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) diff --git a/src/freedreno/vulkan/tu_knl.cc b/src/freedreno/vulkan/tu_knl.cc index 081393c974c..ab27798c0f6 100644 --- a/src/freedreno/vulkan/tu_knl.cc +++ b/src/freedreno/vulkan/tu_knl.cc @@ -43,6 +43,26 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev, size = align64(size, os_page_size); + const VkMemoryPropertyFlags replace_flags_mask = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + const VkMemoryPropertyFlags replace_flags_match = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + if (dev->physical_device->preferred_uncached_as_cached_index >= 0 && + (mem_property & replace_flags_mask) == replace_flags_match) { + /* Override the memory type if the requested type was uncached, only replacing + * if the device supports cached-coherent memory type. + */ + mem_property = + dev->physical_device->memory.types[dev->physical_device->preferred_uncached_as_cached_index]; + } + VkResult result = dev->instance->knl->bo_init(dev, base, out_bo, size, client_iova, mem_property, flags, lazy_vma, name); diff --git a/src/util/driconf.h b/src/util/driconf.h index e618123d817..a353839c31c 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -672,6 +672,9 @@ DRI_CONF_OPT_S_NODEF(tu_autotune_algorithm, \ "Set the preferred autotune algorithm") +#define DRI_CONF_TU_OVERRIDE_UNCACHED_AS_CACHE_COHERENT(def) \ + DRI_CONF_OPT_B(tu_override_uncached_as_cache_coherent, def, \ + "Replaces uncached-host allocations with cached-coherent-host when possible. Only useful under x86 emulation where memory accesses tend to be atomic") /** * \brief Honeykrisp specific configuration options */