radv: Use extra context for video encode queue with multiple VCN instances

amdgpu does VCN instance scheduling per context, so we need to use
different context in order to allow decode to run on one instance
and encode on the other one.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36642>
This commit is contained in:
David Rosca 2025-07-31 14:56:55 +02:00 committed by Marge Bot
parent 2667db1114
commit e394244a89
3 changed files with 25 additions and 1 deletions

View file

@ -1126,6 +1126,8 @@ radv_destroy_device(struct radv_device *device, const VkAllocationCallbacks *pAl
if (device->hw_ctx[i])
device->ws->ctx_destroy(device->hw_ctx[i]);
}
if (device->hw_vcn_enc_ctx)
device->ws->ctx_destroy(device->hw_vcn_enc_ctx);
mtx_destroy(&device->overallocation_mutex);
simple_mtx_destroy(&device->ctx_roll_mtx);
@ -1217,12 +1219,21 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
if (pdev->info.has_kernelq_reg_shadowing || instance->debug_flags & RADV_DEBUG_SHADOW_REGS)
device->uses_shadow_regs = true;
bool video_dec_queue = false;
bool video_enc_queue = false;
/* Create one context per queue priority. */
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
const VkDeviceQueueGlobalPriorityCreateInfo *global_priority =
vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO);
enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
enum radv_queue_family qf = vk_queue_to_radv(pdev, queue_create->queueFamilyIndex);
if (qf == RADV_QUEUE_VIDEO_DEC)
video_dec_queue = true;
else if (qf == RADV_QUEUE_VIDEO_ENC)
video_enc_queue = true;
if (device->hw_ctx[priority])
continue;
@ -1232,6 +1243,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
goto fail;
}
/* Use extra context to allow use of both VCN instances for transcoding. */
if (video_dec_queue && video_enc_queue && pdev->info.ip[AMD_IP_VCN_ENC].num_instances > 1) {
result = device->ws->ctx_create(device->ws, RADEON_CTX_PRIORITY_MEDIUM, &device->hw_vcn_enc_ctx);
if (result != VK_SUCCESS)
return result;
}
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
uint32_t qfi = queue_create->queueFamilyIndex;

View file

@ -148,6 +148,8 @@ struct radv_device {
struct radv_layer_dispatch_tables layer_dispatch;
struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
struct radeon_winsys_ctx *hw_vcn_enc_ctx;
struct radv_meta_state meta_state;
struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];

View file

@ -2017,9 +2017,13 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
const struct radv_physical_device *pdev = radv_device_physical(device);
queue->priority = radv_get_queue_global_priority(global_priority);
queue->hw_ctx = device->hw_ctx[queue->priority];
queue->state.qf = vk_queue_to_radv(pdev, create_info->queueFamilyIndex);
if (queue->state.qf == RADV_QUEUE_VIDEO_ENC && device->hw_vcn_enc_ctx)
queue->hw_ctx = device->hw_vcn_enc_ctx;
else
queue->hw_ctx = device->hw_ctx[queue->priority];
VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
if (result != VK_SUCCESS)
return result;