From e394244a89114c90175f92f1b72e4839d156ab28 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Thu, 31 Jul 2025 14:56:55 +0200 Subject: [PATCH] radv: Use extra context for video encode queue with multiple VCN instances amdgpu does VCN instance scheduling per context, so we need to use different context in order to allow decode to run on one instance and encode on the other one. Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_device.c | 18 ++++++++++++++++++ src/amd/vulkan/radv_device.h | 2 ++ src/amd/vulkan/radv_queue.c | 6 +++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 849cbb03ac4..e18a077c5fc 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1126,6 +1126,8 @@ radv_destroy_device(struct radv_device *device, const VkAllocationCallbacks *pAl if (device->hw_ctx[i]) device->ws->ctx_destroy(device->hw_ctx[i]); } + if (device->hw_vcn_enc_ctx) + device->ws->ctx_destroy(device->hw_vcn_enc_ctx); mtx_destroy(&device->overallocation_mutex); simple_mtx_destroy(&device->ctx_roll_mtx); @@ -1217,12 +1219,21 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr if (pdev->info.has_kernelq_reg_shadowing || instance->debug_flags & RADV_DEBUG_SHADOW_REGS) device->uses_shadow_regs = true; + bool video_dec_queue = false; + bool video_enc_queue = false; + /* Create one context per queue priority. */ for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; const VkDeviceQueueGlobalPriorityCreateInfo *global_priority = vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO); enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority); + enum radv_queue_family qf = vk_queue_to_radv(pdev, queue_create->queueFamilyIndex); + + if (qf == RADV_QUEUE_VIDEO_DEC) + video_dec_queue = true; + else if (qf == RADV_QUEUE_VIDEO_ENC) + video_enc_queue = true; if (device->hw_ctx[priority]) continue; @@ -1232,6 +1243,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr goto fail; } + /* Use extra context to allow use of both VCN instances for transcoding. */ + if (video_dec_queue && video_enc_queue && pdev->info.ip[AMD_IP_VCN_ENC].num_instances > 1) { + result = device->ws->ctx_create(device->ws, RADEON_CTX_PRIORITY_MEDIUM, &device->hw_vcn_enc_ctx); + if (result != VK_SUCCESS) + return result; + } + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; uint32_t qfi = queue_create->queueFamilyIndex; diff --git a/src/amd/vulkan/radv_device.h b/src/amd/vulkan/radv_device.h index 86914f6265c..a57a98093a4 100644 --- a/src/amd/vulkan/radv_device.h +++ b/src/amd/vulkan/radv_device.h @@ -148,6 +148,8 @@ struct radv_device { struct radv_layer_dispatch_tables layer_dispatch; struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX]; + struct radeon_winsys_ctx *hw_vcn_enc_ctx; + struct radv_meta_state meta_state; struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES]; diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index 9a3cca2af8f..4d626711aeb 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -2017,9 +2017,13 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx, const struct radv_physical_device *pdev = radv_device_physical(device); queue->priority = radv_get_queue_global_priority(global_priority); - queue->hw_ctx = device->hw_ctx[queue->priority]; queue->state.qf = vk_queue_to_radv(pdev, create_info->queueFamilyIndex); + if (queue->state.qf == RADV_QUEUE_VIDEO_ENC && device->hw_vcn_enc_ctx) + queue->hw_ctx = device->hw_vcn_enc_ctx; + else + queue->hw_ctx = device->hw_ctx[queue->priority]; + VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx); if (result != VK_SUCCESS) return result;