mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
tu: Initial support for sparse binding
Plumb through support for a sparse queue and enable sparse binding using the kernel interfaces we added earlier. We also support sparse residency for buffers, which is straightforward, but sparse residency for images is much more complicated so it will be enabled later. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32533>
This commit is contained in:
parent
71ef46717c
commit
8feed47fce
8 changed files with 283 additions and 46 deletions
|
|
@ -29,6 +29,39 @@ tu_CreateBuffer(VkDevice _device,
|
|||
if (buffer == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
|
||||
struct tu_instance *instance = device->physical_device->instance;
|
||||
BITMASK_ENUM(tu_sparse_vma_flags) flags = 0;
|
||||
uint64_t client_address = 0;
|
||||
|
||||
if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT)
|
||||
flags |= TU_SPARSE_VMA_MAP_ZERO;
|
||||
if (pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
|
||||
flags |= TU_SPARSE_VMA_REPLAYABLE;
|
||||
|
||||
const VkBufferOpaqueCaptureAddressCreateInfo *replay_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext,
|
||||
BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO);
|
||||
if (replay_info && replay_info->opaqueCaptureAddress) {
|
||||
client_address = replay_info->opaqueCaptureAddress;
|
||||
flags |= TU_SPARSE_VMA_REPLAYABLE;
|
||||
}
|
||||
|
||||
VkResult result =
|
||||
tu_sparse_vma_init(device, &buffer->vk.base, &buffer->vma,
|
||||
&buffer->vk.device_address, flags,
|
||||
pCreateInfo->size, client_address);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
|
||||
return result;
|
||||
}
|
||||
|
||||
vk_address_binding_report(&instance->vk, &buffer->vk.base,
|
||||
buffer->vk.device_address, buffer->vk.size,
|
||||
VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT);
|
||||
}
|
||||
|
||||
TU_RMV(buffer_create, device, buffer);
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
|
@ -58,10 +91,16 @@ tu_DestroyBuffer(VkDevice _device,
|
|||
tu_perfetto_log_destroy_buffer(device, buffer);
|
||||
#endif
|
||||
|
||||
if (buffer->vk.device_address)
|
||||
if (buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
|
||||
vk_address_binding_report(&instance->vk, &buffer->vk.base,
|
||||
buffer->vk.device_address, buffer->vk.size,
|
||||
VK_DEVICE_ADDRESS_BINDING_TYPE_UNBIND_EXT);
|
||||
tu_sparse_vma_finish(device, &buffer->vma);
|
||||
} else if (buffer->vk.device_address) {
|
||||
vk_address_binding_report(&instance->vk, &buffer->vk.base,
|
||||
buffer->vk.device_address, buffer->bo_size,
|
||||
VK_DEVICE_ADDRESS_BINDING_TYPE_UNBIND_EXT);
|
||||
}
|
||||
|
||||
|
||||
vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
|
||||
|
|
@ -76,9 +115,12 @@ tu_GetDeviceBufferMemoryRequirements(
|
|||
VK_FROM_HANDLE(tu_device, device, _device);
|
||||
|
||||
uint64_t size = pInfo->pCreateInfo->size;
|
||||
uint32_t alignment =
|
||||
(pInfo->pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) ?
|
||||
os_page_size : 64;
|
||||
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
|
||||
.size = MAX2(align64(size, 64), size),
|
||||
.alignment = 64,
|
||||
.size = MAX2(align64(size, alignment), size),
|
||||
.alignment = alignment,
|
||||
.memoryTypeBits = (1 << device->physical_device->memory.type_count) - 1,
|
||||
};
|
||||
|
||||
|
|
@ -179,6 +221,13 @@ uint64_t tu_GetBufferOpaqueCaptureAddress(
|
|||
VkDevice _device,
|
||||
const VkBufferDeviceAddressInfo* pInfo)
|
||||
{
|
||||
/* We care only about memory allocation opaque addresses */
|
||||
VK_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
|
||||
|
||||
/* Sparse buffers have their own iova allocation, but all others do not so
|
||||
* we only care about sparse buffers.
|
||||
*/
|
||||
if (buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
|
||||
return buffer->vk.device_address;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,14 +12,21 @@
|
|||
|
||||
#include "tu_common.h"
|
||||
|
||||
#include "tu_knl.h"
|
||||
|
||||
#include "vk_buffer.h"
|
||||
|
||||
struct tu_buffer
|
||||
{
|
||||
struct vk_buffer vk;
|
||||
|
||||
struct tu_bo *bo;
|
||||
uint64_t bo_size;
|
||||
union {
|
||||
struct {
|
||||
struct tu_bo *bo;
|
||||
uint64_t bo_size;
|
||||
};
|
||||
struct tu_sparse_vma vma;
|
||||
};
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer, vk.base, VkBuffer,
|
||||
|
|
|
|||
|
|
@ -407,7 +407,8 @@ tu_get_features(struct tu_physical_device *pdevice,
|
|||
features->shaderFloat64 = false;
|
||||
features->shaderInt64 = true;
|
||||
features->shaderInt16 = true;
|
||||
features->sparseBinding = false;
|
||||
features->sparseBinding = pdevice->has_sparse;
|
||||
features->sparseResidencyBuffer = pdevice->has_sparse_prr;
|
||||
features->variableMultisampleRate = true;
|
||||
features->inheritedQueries = true;
|
||||
|
||||
|
|
@ -1022,7 +1023,7 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
|||
props->maxMemoryAllocationCount = UINT32_MAX;
|
||||
props->maxSamplerAllocationCount = 64 * 1024;
|
||||
props->bufferImageGranularity = 64; /* A cache line */
|
||||
props->sparseAddressSpaceSize = 0;
|
||||
props->sparseAddressSpaceSize = pdevice->va_size;
|
||||
props->maxBoundDescriptorSets = pdevice->usable_sets;
|
||||
props->maxPerStageDescriptorSamplers = max_descriptor_set_size;
|
||||
props->maxPerStageDescriptorUniformBuffers = max_descriptor_set_size;
|
||||
|
|
@ -1158,7 +1159,7 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
|||
props->sparseResidencyStandard2DMultisampleBlockShape = { 0 };
|
||||
props->sparseResidencyStandard3DBlockShape = { 0 };
|
||||
props->sparseResidencyAlignedMipSize = { 0 };
|
||||
props->sparseResidencyNonResidentStrict = { 0 };
|
||||
props->sparseResidencyNonResidentStrict = true;
|
||||
|
||||
strcpy(props->deviceName, pdevice->name);
|
||||
memcpy(props->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
|
||||
|
|
@ -1300,7 +1301,7 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
|||
props->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
|
||||
props->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
|
||||
props->bufferCaptureReplayDescriptorDataSize = 0;
|
||||
props->imageCaptureReplayDescriptorDataSize = 0;
|
||||
props->imageCaptureReplayDescriptorDataSize = sizeof(uint64_t);
|
||||
props->imageViewCaptureReplayDescriptorDataSize = 0;
|
||||
props->samplerCaptureReplayDescriptorDataSize = 0;
|
||||
props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
|
||||
|
|
@ -1465,6 +1466,25 @@ static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
|
|||
NULL,
|
||||
};
|
||||
|
||||
/* Note if we introduce more queues in a family that we may need to reduce the max
|
||||
* scope in our nir_opt_acquire_release_barriers() call. See
|
||||
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33504#note_2807879
|
||||
*/
|
||||
static const VkQueueFamilyProperties tu_gfx_queue_family_properties = {
|
||||
.queueFlags =
|
||||
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
|
||||
.queueCount = 1,
|
||||
.timestampValidBits = 48,
|
||||
.minImageTransferGranularity = { 1, 1, 1 },
|
||||
};
|
||||
|
||||
static const VkQueueFamilyProperties tu_sparse_queue_family_properties = {
|
||||
.queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
|
||||
.queueCount = 1,
|
||||
.timestampValidBits = 48,
|
||||
.minImageTransferGranularity = { 1, 1, 1 },
|
||||
};
|
||||
|
||||
VkResult
|
||||
tu_physical_device_init(struct tu_physical_device *device,
|
||||
struct tu_instance *instance)
|
||||
|
|
@ -1625,6 +1645,20 @@ tu_physical_device_init(struct tu_physical_device *device,
|
|||
|
||||
device->vk.supported_sync_types = device->sync_types;
|
||||
|
||||
device->queue_families[device->num_queue_families++] =
|
||||
(struct tu_queue_family) {
|
||||
.type = TU_QUEUE_GFX,
|
||||
.properties = &tu_gfx_queue_family_properties,
|
||||
};
|
||||
|
||||
if (device->has_sparse) {
|
||||
device->queue_families[device->num_queue_families++] =
|
||||
(struct tu_queue_family) {
|
||||
.type = TU_QUEUE_SPARSE,
|
||||
.properties = &tu_sparse_queue_family_properties,
|
||||
};
|
||||
}
|
||||
|
||||
#ifdef TU_USE_WSI_PLATFORM
|
||||
result = tu_wsi_init(device);
|
||||
if (result != VK_SUCCESS) {
|
||||
|
|
@ -1807,23 +1841,19 @@ tu_DestroyInstance(VkInstance _instance,
|
|||
vk_free(&instance->vk.alloc, instance);
|
||||
}
|
||||
|
||||
/* Note if we introduce more queues in a family that we may need to reduce the max
|
||||
* scope in our nir_opt_acquire_release_barriers() call. See
|
||||
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33504#note_2807879
|
||||
*/
|
||||
static const VkQueueFamilyProperties tu_queue_family_properties = {
|
||||
.queueFlags =
|
||||
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
|
||||
.queueCount = 1,
|
||||
.timestampValidBits = 48,
|
||||
.minImageTransferGranularity = { 1, 1, 1 },
|
||||
};
|
||||
|
||||
void
|
||||
tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
|
||||
enum tu_queue_type type,
|
||||
VkQueueFamilyGlobalPriorityPropertiesKHR *props)
|
||||
{
|
||||
props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 4);
|
||||
/* drm/msm only supports one priority for VM_BIND queues */
|
||||
if (type == TU_QUEUE_SPARSE) {
|
||||
props->priorityCount = 1;
|
||||
props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
|
||||
return;
|
||||
}
|
||||
|
||||
props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 3);
|
||||
switch (props->priorityCount) {
|
||||
case 1:
|
||||
props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
|
||||
|
|
@ -1860,20 +1890,24 @@ tu_GetPhysicalDeviceQueueFamilyProperties2(
|
|||
VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
|
||||
pQueueFamilyProperties, pQueueFamilyPropertyCount);
|
||||
|
||||
vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
|
||||
{
|
||||
p->queueFamilyProperties = tu_queue_family_properties;
|
||||
for (unsigned i = 0; i < pdevice->num_queue_families; i++) {
|
||||
struct tu_queue_family *family = &pdevice->queue_families[i];
|
||||
|
||||
vk_foreach_struct(ext, p->pNext) {
|
||||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
|
||||
VkQueueFamilyGlobalPriorityPropertiesKHR *props =
|
||||
(VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
|
||||
tu_physical_device_get_global_priority_properties(pdevice, props);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
|
||||
p->queueFamilyProperties = *family->properties;
|
||||
|
||||
vk_foreach_struct(ext, p->pNext) {
|
||||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
|
||||
VkQueueFamilyGlobalPriorityPropertiesKHR *props =
|
||||
(VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
|
||||
tu_physical_device_get_global_priority_properties(
|
||||
pdevice, family->type, props);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2640,6 +2674,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
const VkDeviceQueueCreateInfo *queue_create =
|
||||
&pCreateInfo->pQueueCreateInfos[i];
|
||||
uint32_t qfi = queue_create->queueFamilyIndex;
|
||||
enum tu_queue_type type = physical_device->queue_families[qfi].type;
|
||||
device->queues[qfi] = (struct tu_queue *) vk_alloc(
|
||||
&device->vk.alloc,
|
||||
queue_create->queueCount * sizeof(struct tu_queue), 8,
|
||||
|
|
@ -2657,7 +2692,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
device->queue_count[qfi] = queue_create->queueCount;
|
||||
|
||||
for (unsigned q = 0; q < queue_create->queueCount; q++) {
|
||||
result = tu_queue_init(device, &device->queues[qfi][q], q, queue_create);
|
||||
result = tu_queue_init(device, &device->queues[qfi][q], type, q,
|
||||
queue_create);
|
||||
if (result != VK_SUCCESS) {
|
||||
device->queue_count[qfi] = q;
|
||||
goto fail_queues;
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
#include "tu_cs.h"
|
||||
#include "tu_pass.h"
|
||||
#include "tu_perfetto.h"
|
||||
#include "tu_queue.h"
|
||||
#include "tu_suballoc.h"
|
||||
#include "tu_util.h"
|
||||
|
||||
|
|
@ -31,7 +32,7 @@
|
|||
/* queue types */
|
||||
#define TU_QUEUE_GENERAL 0
|
||||
|
||||
#define TU_MAX_QUEUE_FAMILIES 1
|
||||
#define TU_MAX_QUEUE_FAMILIES 2
|
||||
|
||||
#define TU_BORDER_COLOR_COUNT 4096
|
||||
|
||||
|
|
@ -72,6 +73,11 @@ enum tu_kgsl_dma_type
|
|||
TU_KGSL_DMA_TYPE_DMAHEAP,
|
||||
};
|
||||
|
||||
struct tu_queue_family {
|
||||
enum tu_queue_type type;
|
||||
const VkQueueFamilyProperties *properties;
|
||||
};
|
||||
|
||||
extern uint64_t os_page_size;
|
||||
|
||||
struct tu_physical_device
|
||||
|
|
@ -142,6 +148,9 @@ struct tu_physical_device
|
|||
VkMemoryPropertyFlags types[VK_MAX_MEMORY_TYPES];
|
||||
} memory;
|
||||
|
||||
struct tu_queue_family queue_families[TU_MAX_QUEUE_FAMILIES];
|
||||
unsigned num_queue_families;
|
||||
|
||||
struct fd_dev_id dev_id;
|
||||
struct fd_dev_info dev_info;
|
||||
const struct fd_dev_info *info;
|
||||
|
|
@ -534,6 +543,7 @@ tu_physical_device_init(struct tu_physical_device *device,
|
|||
|
||||
void
|
||||
tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
|
||||
enum tu_queue_type type,
|
||||
VkQueueFamilyGlobalPriorityPropertiesKHR *props);
|
||||
|
||||
uint64_t
|
||||
|
|
|
|||
|
|
@ -878,6 +878,37 @@ tu_CreateImage(VkDevice _device,
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
|
||||
struct tu_instance *instance = device->physical_device->instance;
|
||||
BITMASK_ENUM(tu_sparse_vma_flags) flags = 0;
|
||||
|
||||
uint64_t client_address = 0;
|
||||
if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)
|
||||
flags |= TU_SPARSE_VMA_MAP_ZERO;
|
||||
if (pCreateInfo->flags & VK_IMAGE_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT)
|
||||
flags |= TU_SPARSE_VMA_REPLAYABLE;
|
||||
|
||||
const VkOpaqueCaptureDescriptorDataCreateInfoEXT *replay_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext,
|
||||
OPAQUE_CAPTURE_DESCRIPTOR_DATA_CREATE_INFO_EXT);
|
||||
if (replay_info && replay_info->opaqueCaptureDescriptorData) {
|
||||
flags |= TU_SPARSE_VMA_REPLAYABLE;
|
||||
client_address =
|
||||
*(const uint64_t *)replay_info->opaqueCaptureDescriptorData;
|
||||
}
|
||||
|
||||
result = tu_sparse_vma_init(device, &image->vk.base, &image->vma,
|
||||
&image->iova, flags, image->total_size,
|
||||
client_address);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
vk_address_binding_report(&instance->vk, &image->vk.base,
|
||||
image->iova, image->total_size,
|
||||
VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT);
|
||||
}
|
||||
|
||||
TU_RMV(image_create, device, image);
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
|
@ -910,6 +941,10 @@ tu_DestroyImage(VkDevice _device,
|
|||
tu_perfetto_log_destroy_image(device, image);
|
||||
#endif
|
||||
|
||||
if (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
|
||||
tu_sparse_vma_finish(device, &image->vma);
|
||||
}
|
||||
|
||||
if (image->iova)
|
||||
vk_address_binding_report(&instance->vk, &image->vk.base,
|
||||
image->iova, image->total_size,
|
||||
|
|
@ -1009,9 +1044,13 @@ static void
|
|||
tu_get_image_memory_requirements(struct tu_device *dev, struct tu_image *image,
|
||||
VkMemoryRequirements2 *pMemoryRequirements)
|
||||
{
|
||||
uint32_t alignment = image->layout[0].base_align;
|
||||
if (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
|
||||
alignment = MAX2(alignment, os_page_size);
|
||||
|
||||
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
|
||||
.size = image->total_size,
|
||||
.alignment = image->layout[0].base_align,
|
||||
.alignment = alignment,
|
||||
.memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1,
|
||||
};
|
||||
|
||||
|
|
@ -1209,3 +1248,17 @@ tu_fragment_density_map_sample(const struct tu_image_view *fdm,
|
|||
area->width = 1.0f / density[0];
|
||||
area->height = 1.0f / density[1];
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
tu_GetImageOpaqueCaptureDescriptorDataEXT(VkDevice device,
|
||||
const VkImageCaptureDescriptorDataInfoEXT *pInfo,
|
||||
void *pData)
|
||||
{
|
||||
VK_FROM_HANDLE(tu_image, image, pInfo->image);
|
||||
|
||||
/* Save the image iova so that when replaying sparse images have a
|
||||
* consistent iova and therefore consistent descriptor contents.
|
||||
*/
|
||||
*(uint64_t *)pData = image->iova;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@
|
|||
#include "tu_common.h"
|
||||
#include "fdl/freedreno_lrz_layout.h"
|
||||
|
||||
#include "tu_knl.h"
|
||||
|
||||
#define TU_MAX_PLANE_COUNT 3
|
||||
|
||||
#define tu_fdl_view_stencil(view, x) \
|
||||
|
|
@ -35,9 +37,14 @@ struct tu_image
|
|||
uint64_t total_size;
|
||||
|
||||
/* Set when bound */
|
||||
struct tu_bo *bo;
|
||||
uint64_t bo_offset;
|
||||
uint64_t iova;
|
||||
union {
|
||||
struct {
|
||||
struct tu_bo *bo;
|
||||
uint64_t bo_offset;
|
||||
};
|
||||
struct tu_sparse_vma vma;
|
||||
};
|
||||
|
||||
/* For fragment density map */
|
||||
void *map;
|
||||
|
|
|
|||
|
|
@ -9,8 +9,10 @@
|
|||
|
||||
#include "tu_queue.h"
|
||||
|
||||
#include "tu_buffer.h"
|
||||
#include "tu_cmd_buffer.h"
|
||||
#include "tu_dynamic_rendering.h"
|
||||
#include "tu_image.h"
|
||||
#include "tu_knl.h"
|
||||
#include "tu_device.h"
|
||||
|
||||
|
|
@ -19,11 +21,12 @@
|
|||
static int
|
||||
tu_get_submitqueue_priority(const struct tu_physical_device *pdevice,
|
||||
VkQueueGlobalPriorityKHR global_priority,
|
||||
enum tu_queue_type type,
|
||||
bool global_priority_query)
|
||||
{
|
||||
if (global_priority_query) {
|
||||
VkQueueFamilyGlobalPriorityPropertiesKHR props;
|
||||
tu_physical_device_get_global_priority_properties(pdevice, &props);
|
||||
tu_physical_device_get_global_priority_properties(pdevice, type, &props);
|
||||
|
||||
bool valid = false;
|
||||
for (uint32_t i = 0; i < props.priorityCount; i++) {
|
||||
|
|
@ -37,6 +40,10 @@ tu_get_submitqueue_priority(const struct tu_physical_device *pdevice,
|
|||
return -1;
|
||||
}
|
||||
|
||||
/* drm/msm requires a priority of 0 */
|
||||
if (type == TU_QUEUE_SPARSE)
|
||||
return 0;
|
||||
|
||||
/* Valid values are from 0 to (pdevice->submitqueue_priority_count - 1),
|
||||
* with 0 being the highest priority.
|
||||
*
|
||||
|
|
@ -253,9 +260,75 @@ fail_create_submit:
|
|||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
queue_submit_sparse(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
|
||||
{
|
||||
struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk);
|
||||
struct tu_device *device = queue->device;
|
||||
|
||||
pthread_mutex_lock(&device->submit_mutex);
|
||||
|
||||
void *submit = tu_submit_create(device);
|
||||
if (!submit)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
for (uint32_t i = 0; i < vk_submit->buffer_bind_count; i++) {
|
||||
const VkSparseBufferMemoryBindInfo *bind = &vk_submit->buffer_binds[i];
|
||||
VK_FROM_HANDLE(tu_buffer, buffer, bind->buffer);
|
||||
|
||||
for (uint32_t j = 0; j < bind->bindCount; j++) {
|
||||
const VkSparseMemoryBind *range = &bind->pBinds[j];
|
||||
VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
|
||||
|
||||
tu_submit_add_bind(queue->device, submit,
|
||||
&buffer->vma, range->resourceOffset,
|
||||
mem ? mem->bo : NULL,
|
||||
mem ? range->memoryOffset : 0,
|
||||
range->size);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < vk_submit->image_opaque_bind_count; i++) {
|
||||
const VkSparseImageOpaqueMemoryBindInfo *bind =
|
||||
&vk_submit->image_opaque_binds[i];
|
||||
VK_FROM_HANDLE(tu_image, image, bind->image);
|
||||
|
||||
for (uint32_t j = 0; j < bind->bindCount; j++) {
|
||||
const VkSparseMemoryBind *range = &bind->pBinds[j];
|
||||
VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
|
||||
|
||||
tu_submit_add_bind(queue->device, submit,
|
||||
&image->vma, range->resourceOffset,
|
||||
mem ? mem->bo : NULL,
|
||||
mem ? range->memoryOffset : 0,
|
||||
range->size);
|
||||
}
|
||||
}
|
||||
|
||||
VkResult result =
|
||||
tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count,
|
||||
vk_submit->signals, vk_submit->signal_count,
|
||||
NULL);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
pthread_mutex_unlock(&device->submit_mutex);
|
||||
goto out;
|
||||
}
|
||||
|
||||
device->submit_count++;
|
||||
|
||||
pthread_mutex_unlock(&device->submit_mutex);
|
||||
pthread_cond_broadcast(&queue->device->timeline_cond);
|
||||
|
||||
out:
|
||||
tu_submit_finish(device, submit);
|
||||
|
||||
return result;
|
||||
}
|
||||
VkResult
|
||||
tu_queue_init(struct tu_device *device,
|
||||
struct tu_queue *queue,
|
||||
enum tu_queue_type type,
|
||||
int idx,
|
||||
const VkDeviceQueueCreateInfo *create_info)
|
||||
{
|
||||
|
|
@ -268,7 +341,7 @@ tu_queue_init(struct tu_device *device,
|
|||
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR);
|
||||
|
||||
const int priority = tu_get_submitqueue_priority(
|
||||
device->physical_device, global_priority,
|
||||
device->physical_device, global_priority, type,
|
||||
device->vk.enabled_features.globalPriorityQuery);
|
||||
if (priority < 0) {
|
||||
return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
|
||||
|
|
@ -281,10 +354,11 @@ tu_queue_init(struct tu_device *device,
|
|||
|
||||
queue->device = device;
|
||||
queue->priority = priority;
|
||||
queue->vk.driver_submit = queue_submit;
|
||||
queue->type = TU_QUEUE_GFX;
|
||||
queue->vk.driver_submit =
|
||||
(type == TU_QUEUE_SPARSE) ? queue_submit_sparse : queue_submit;
|
||||
queue->type = type;
|
||||
|
||||
int ret = tu_drm_submitqueue_new(device, TU_QUEUE_GFX, priority, &queue->msm_queue_id);
|
||||
int ret = tu_drm_submitqueue_new(device, type, priority, &queue->msm_queue_id);
|
||||
if (ret)
|
||||
return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"submitqueue create failed");
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ VK_DEFINE_HANDLE_CASTS(tu_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
|
|||
VkResult
|
||||
tu_queue_init(struct tu_device *device,
|
||||
struct tu_queue *queue,
|
||||
enum tu_queue_type type,
|
||||
int idx,
|
||||
const VkDeviceQueueCreateInfo *create_info);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue