mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-14 13:00:31 +01:00
panvk: Advertise a HOST_CACHED memory type if we have WC maps
If the GPU is IO coherent, we expose one memory type that's both host-coherent and host-cached. Otherwise we expose one type that's host-uncached and host-coherent, and one that's host-cached and host-noncoherent. By default, we advertise <cached,non-coherent> before <non-cached,coherent> because that's the combination providing the best perfs in situations where the user knows how to deal with the non-coherent nature of the GPU. Unfortunately, the CTS has a few bugs (missing or incorrect flush/inval calls) forcing us to re-order things. We might drop the flag at some point (some fixes have been submitted, others are on their way). Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36385>
This commit is contained in:
parent
2afef24d3f
commit
1c7793ea0b
7 changed files with 119 additions and 10 deletions
|
|
@ -94,7 +94,7 @@ panfrost-g52-vk:arm64:
|
||||||
FDO_CI_CONCURRENT: 6 # We get OOMkills if we go too wide since VKCTS 1.4.4.0
|
FDO_CI_CONCURRENT: 6 # We get OOMkills if we go too wide since VKCTS 1.4.4.0
|
||||||
MESA_VK_IGNORE_CONFORMANCE_WARNING: 1
|
MESA_VK_IGNORE_CONFORMANCE_WARNING: 1
|
||||||
PAN_I_WANT_A_BROKEN_VULKAN_DRIVER: 1
|
PAN_I_WANT_A_BROKEN_VULKAN_DRIVER: 1
|
||||||
PANVK_DEBUG: "no_known_warn,sync"
|
PANVK_DEBUG: "no_known_warn,sync,coherent_before_cached"
|
||||||
DEQP_SUITE: panfrost-g52-vk
|
DEQP_SUITE: panfrost-g52-vk
|
||||||
HWCI_START_WESTON: 1
|
HWCI_START_WESTON: 1
|
||||||
|
|
||||||
|
|
@ -184,7 +184,7 @@ panfrost-g610-vk:arm64:
|
||||||
# Using more than 4 cores cause instabilities
|
# Using more than 4 cores cause instabilities
|
||||||
FDO_CI_CONCURRENT: 4
|
FDO_CI_CONCURRENT: 4
|
||||||
MESA_VK_IGNORE_CONFORMANCE_WARNING: 1
|
MESA_VK_IGNORE_CONFORMANCE_WARNING: 1
|
||||||
PANVK_DEBUG: "no_known_warn,sync,cs"
|
PANVK_DEBUG: "no_known_warn,sync,cs,coherent_before_cached"
|
||||||
DEQP_SUITE: panfrost-g610-vk
|
DEQP_SUITE: panfrost-g610-vk
|
||||||
DEQP_FRACTION: 5
|
DEQP_FRACTION: 5
|
||||||
HWCI_START_WESTON: 1
|
HWCI_START_WESTON: 1
|
||||||
|
|
@ -208,7 +208,7 @@ panfrost-g925-vk:arm64:
|
||||||
variables:
|
variables:
|
||||||
DRIVER_NAME: panvk
|
DRIVER_NAME: panvk
|
||||||
MESA_VK_IGNORE_CONFORMANCE_WARNING: 1
|
MESA_VK_IGNORE_CONFORMANCE_WARNING: 1
|
||||||
PANVK_DEBUG: "no_known_warn,sync,cs"
|
PANVK_DEBUG: "no_known_warn,sync,cs,coherent_before_cached"
|
||||||
DEQP_SUITE: panfrost-g925-vk
|
DEQP_SUITE: panfrost-g925-vk
|
||||||
HWCI_START_WESTON: 1
|
HWCI_START_WESTON: 1
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -90,12 +90,17 @@ panvk_AllocateMemory(VkDevice _device,
|
||||||
}
|
}
|
||||||
|
|
||||||
VK_FROM_HANDLE(panvk_device, device, _device);
|
VK_FROM_HANDLE(panvk_device, device, _device);
|
||||||
|
struct panvk_physical_device *physical_device =
|
||||||
|
to_panvk_physical_device(device->vk.physical);
|
||||||
struct panvk_device_memory *mem;
|
struct panvk_device_memory *mem;
|
||||||
bool can_be_exported = false;
|
bool can_be_exported = false;
|
||||||
VkResult result;
|
VkResult result;
|
||||||
|
|
||||||
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
|
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
|
||||||
|
|
||||||
|
const VkMemoryType *type =
|
||||||
|
&physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
|
||||||
|
|
||||||
const VkExportMemoryAllocateInfo *export_info =
|
const VkExportMemoryAllocateInfo *export_info =
|
||||||
vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
|
vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
|
||||||
|
|
||||||
|
|
@ -130,9 +135,19 @@ panvk_AllocateMemory(VkDevice _device,
|
||||||
goto err_destroy_mem;
|
goto err_destroy_mem;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
uint32_t bo_flags = 0;
|
||||||
|
|
||||||
|
/* We don't do cached on exported buffers to keep the pre-WB_MMAP
|
||||||
|
* behavior.
|
||||||
|
*/
|
||||||
|
if (!can_be_exported &&
|
||||||
|
(type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT))
|
||||||
|
bo_flags |= PAN_KMOD_BO_FLAG_WB_MMAP;
|
||||||
|
|
||||||
|
bo_flags = panvk_device_adjust_bo_flags(device, bo_flags);
|
||||||
mem->bo = pan_kmod_bo_alloc(device->kmod.dev,
|
mem->bo = pan_kmod_bo_alloc(device->kmod.dev,
|
||||||
can_be_exported ? NULL : device->kmod.vm,
|
can_be_exported ? NULL : device->kmod.vm,
|
||||||
pAllocateInfo->allocationSize, 0);
|
pAllocateInfo->allocationSize, bo_flags);
|
||||||
if (!mem->bo) {
|
if (!mem->bo) {
|
||||||
result = panvk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
result = panvk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||||
goto err_destroy_mem;
|
goto err_destroy_mem;
|
||||||
|
|
@ -401,8 +416,38 @@ panvk_GetMemoryFdPropertiesKHR(VkDevice _device,
|
||||||
to_panvk_physical_device(device->vk.physical);
|
to_panvk_physical_device(device->vk.physical);
|
||||||
|
|
||||||
assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
|
assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
|
||||||
pMemoryFdProperties->memoryTypeBits =
|
|
||||||
BITFIELD_MASK(phys_dev->memory.type_count);
|
struct pan_kmod_bo *bo = pan_kmod_bo_import(device->kmod.dev, fd, 0);
|
||||||
|
if (!bo)
|
||||||
|
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
|
||||||
|
|
||||||
|
pMemoryFdProperties->memoryTypeBits = 0;
|
||||||
|
|
||||||
|
/* Keep things simple by only allowing host-visible if the BO doesn't require
|
||||||
|
* kernel-side synchronization going through the dma-buf exporter, which is
|
||||||
|
* reflected through the PAN_KMOD_BO_FLAG_FORCE_FULL_KERNEL_SYNC flag.
|
||||||
|
*/
|
||||||
|
const bool can_do_host_visible = !(bo->flags & PAN_KMOD_BO_FLAG_NO_MMAP);
|
||||||
|
const bool can_do_host_coherent = !(bo->flags & PAN_KMOD_BO_FLAG_WB_MMAP) ||
|
||||||
|
(bo->flags & PAN_KMOD_BO_FLAG_IO_COHERENT);
|
||||||
|
const bool can_do_host_cached = (bo->flags & PAN_KMOD_BO_FLAG_WB_MMAP);
|
||||||
|
|
||||||
|
pMemoryFdProperties->memoryTypeBits = 0;
|
||||||
|
for (uint32_t i = 0; i < phys_dev->memory.type_count; i++) {
|
||||||
|
if (!can_do_host_visible && (phys_dev->memory.types[i].propertyFlags &
|
||||||
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
|
||||||
|
continue;
|
||||||
|
if (!can_do_host_coherent && (phys_dev->memory.types[i].propertyFlags &
|
||||||
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
|
||||||
|
continue;
|
||||||
|
if (!can_do_host_cached && (phys_dev->memory.types[i].propertyFlags &
|
||||||
|
VK_MEMORY_PROPERTY_HOST_CACHED_BIT))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
pMemoryFdProperties->memoryTypeBits |= BITFIELD_BIT(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
pan_kmod_bo_put(bo);
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@ static const struct debug_control panvk_debug_options[] = {
|
||||||
{"wsi_afbc", PANVK_DEBUG_WSI_AFBC},
|
{"wsi_afbc", PANVK_DEBUG_WSI_AFBC},
|
||||||
{"no_wb_mmap", PANVK_DEBUG_NO_WB_MMAP},
|
{"no_wb_mmap", PANVK_DEBUG_NO_WB_MMAP},
|
||||||
{"no_user_mmap_sync", PANVK_DEBUG_NO_USER_MMAP_SYNC},
|
{"no_user_mmap_sync", PANVK_DEBUG_NO_USER_MMAP_SYNC},
|
||||||
|
{"coherent_before_cached", PANVK_DEBUG_COHERENT_BEFORE_CACHED},
|
||||||
{NULL, 0},
|
{NULL, 0},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ enum panvk_debug_flags {
|
||||||
PANVK_DEBUG_WSI_AFBC = 1 << 13,
|
PANVK_DEBUG_WSI_AFBC = 1 << 13,
|
||||||
PANVK_DEBUG_NO_WB_MMAP = 1 << 14,
|
PANVK_DEBUG_NO_WB_MMAP = 1 << 14,
|
||||||
PANVK_DEBUG_NO_USER_MMAP_SYNC = 1 << 15,
|
PANVK_DEBUG_NO_USER_MMAP_SYNC = 1 << 15,
|
||||||
|
PANVK_DEBUG_COHERENT_BEFORE_CACHED = 1 << 16,
|
||||||
};
|
};
|
||||||
|
|
||||||
extern uint64_t panvk_debug;
|
extern uint64_t panvk_debug;
|
||||||
|
|
|
||||||
|
|
@ -242,20 +242,80 @@ static VkResult
|
||||||
get_device_heaps(struct panvk_physical_device *device,
|
get_device_heaps(struct panvk_physical_device *device,
|
||||||
const struct panvk_instance *instance)
|
const struct panvk_instance *instance)
|
||||||
{
|
{
|
||||||
|
int host_coherent_not_cached_idx = -1;
|
||||||
|
int host_cached_not_coherent_idx = -1;
|
||||||
|
|
||||||
device->memory.heap_count = 1;
|
device->memory.heap_count = 1;
|
||||||
device->memory.heaps[0] = (VkMemoryHeap) {
|
device->memory.heaps[0] = (VkMemoryHeap) {
|
||||||
.size = get_system_heap_size(),
|
.size = get_system_heap_size(),
|
||||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||||
};
|
};
|
||||||
|
|
||||||
device->memory.type_count = 1;
|
device->memory.type_count = 0;
|
||||||
device->memory.types[0] = (VkMemoryType) {
|
|
||||||
|
/* We don't have VRAM, but we expose a device-local only type so we can
|
||||||
|
* prevent imported dma-bufs that come from other drivers/subsystems from
|
||||||
|
* being CPU-mapped.
|
||||||
|
*/
|
||||||
|
device->memory.types[device->memory.type_count++] = (VkMemoryType) {
|
||||||
|
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||||
|
.heapIndex = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (device->kmod.dev->props.is_io_coherent) {
|
||||||
|
assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
|
||||||
|
/* If the device is coherent, we just have one memory type that's both
|
||||||
|
* host-cached and host-coherent. */
|
||||||
|
device->memory.types[device->memory.type_count++] = (VkMemoryType) {
|
||||||
|
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||||
|
.heapIndex = 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!PANVK_DEBUG(NO_WB_MMAP) &&
|
||||||
|
(device->kmod.dev->props.supported_bo_flags & PAN_KMOD_BO_FLAG_WB_MMAP)) {
|
||||||
|
assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
|
||||||
|
host_cached_not_coherent_idx = device->memory.type_count;
|
||||||
|
device->memory.types[device->memory.type_count++] = (VkMemoryType) {
|
||||||
|
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
|
||||||
|
.heapIndex = 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
|
||||||
|
host_coherent_not_cached_idx = device->memory.type_count;
|
||||||
|
device->memory.types[device->memory.type_count++] = (VkMemoryType) {
|
||||||
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||||
.heapIndex = 0,
|
.heapIndex = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Ideally, we'd place HOST_CACHED first for perf reasons, but there's
|
||||||
|
* so many broken CTS tests (missing or invalid flush/invalidate
|
||||||
|
* calls), and so many added at each version that it gets impossible to
|
||||||
|
* catch up. So, keep things ordered in a way that the first HOST_VISIBLE
|
||||||
|
* type is also the one requiring no CPU cache maintenance if we're asked
|
||||||
|
* to.
|
||||||
|
*/
|
||||||
|
if (PANVK_DEBUG(COHERENT_BEFORE_CACHED) &&
|
||||||
|
host_cached_not_coherent_idx != -1 &&
|
||||||
|
host_coherent_not_cached_idx != -1 &&
|
||||||
|
host_coherent_not_cached_idx > host_cached_not_coherent_idx) {
|
||||||
|
VkMemoryType host_cached_not_coherent_type =
|
||||||
|
device->memory.types[host_cached_not_coherent_idx];
|
||||||
|
|
||||||
|
device->memory.types[host_cached_not_coherent_idx] =
|
||||||
|
device->memory.types[host_coherent_not_cached_idx];
|
||||||
|
device->memory.types[host_coherent_not_cached_idx] =
|
||||||
|
host_cached_not_coherent_type;
|
||||||
|
}
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@ struct panvk_physical_device {
|
||||||
VkMemoryHeap heaps[1];
|
VkMemoryHeap heaps[1];
|
||||||
uint32_t heap_count;
|
uint32_t heap_count;
|
||||||
|
|
||||||
VkMemoryType types[1];
|
VkMemoryType types[4];
|
||||||
uint32_t type_count;
|
uint32_t type_count;
|
||||||
} memory;
|
} memory;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -808,7 +808,9 @@ panvk_per_arch(get_physical_device_properties)(
|
||||||
.standardSampleLocations = true,
|
.standardSampleLocations = true,
|
||||||
.optimalBufferCopyOffsetAlignment = 64,
|
.optimalBufferCopyOffsetAlignment = 64,
|
||||||
.optimalBufferCopyRowPitchAlignment = 64,
|
.optimalBufferCopyRowPitchAlignment = 64,
|
||||||
.nonCoherentAtomSize = 64,
|
|
||||||
|
/* If we can't detect the cacheline size, assume 64 bytes cachelines. */
|
||||||
|
.nonCoherentAtomSize = util_has_cache_ops() ? util_cache_granularity() : 64,
|
||||||
|
|
||||||
/* Vulkan 1.0 sparse properties */
|
/* Vulkan 1.0 sparse properties */
|
||||||
.sparseResidencyNonResidentStrict = false,
|
.sparseResidencyNonResidentStrict = false,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue