From a5f2c8c845e5722e2be0c8336cc5bcd21cfb0285 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 9 Jun 2023 14:22:58 -0700 Subject: [PATCH] anv: create individual logical engines on i915 when possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables us to create more logical engines than HW engines are available. This also brings the uAPI usage closer to what is happening on Xe. Rework: (Sagar) - Correct exec_flag at the time of submission - Handle device status check - Set queue parameters Signed-off-by: Lionel Landwerlin Reviewed-by: Sagar Ghuge Reviewed-by: José Roberto de Souza Part-of: --- src/intel/vulkan/anv_device.c | 6 +- src/intel/vulkan/anv_private.h | 6 ++ src/intel/vulkan/i915/anv_batch_chain.c | 56 +++++++++-- src/intel/vulkan/i915/anv_device.c | 127 ++++++++++++++---------- src/intel/vulkan/i915/anv_device.h | 4 + src/intel/vulkan/i915/anv_queue.c | 30 +++++- 6 files changed, 167 insertions(+), 62 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index f8ef727981a..9dd15fa919c 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -60,6 +60,7 @@ #include "vk_drm_syncobj.h" #include "common/intel_aux_map.h" #include "common/intel_uuid.h" +#include "common/i915/intel_gem.h" #include "perf/intel_perf.h" #include "i915/anv_device.h" @@ -3005,7 +3006,10 @@ anv_device_destroy_context_or_vm(struct anv_device *device) { switch (device->info->kmd_type) { case INTEL_KMD_TYPE_I915: - return intel_gem_destroy_context(device->fd, device->context_id); + if (device->physical->has_vm_control) + return anv_i915_device_destroy_vm(device); + else + return intel_gem_destroy_context(device->fd, device->context_id); case INTEL_KMD_TYPE_XE: return anv_xe_device_destroy_vm(device); default: diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ede24a649c0..1fff2bd3e09 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -914,6 +914,9 @@ struct anv_physical_device { /** True if we can create protected contexts. */ bool has_protected_contexts; + /** Whether the i915 driver has the ability to create VM objects */ + bool has_vm_control; + /**/ bool uses_ex_bso; @@ -1068,6 +1071,7 @@ struct anv_queue { union { uint32_t exec_flags; /* i915 */ + uint32_t context_id; /* i915 */ uint32_t exec_queue_id; /* Xe */ }; @@ -1761,6 +1765,8 @@ int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle); int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); +int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param, + uint64_t value); uint64_t anv_vma_alloc(struct anv_device *device, uint64_t size, uint64_t align, diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index aee04c5093d..875b3abba6c 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -318,6 +318,21 @@ pin_state_pool(struct anv_device *device, return VK_SUCCESS; } +static void +get_context_and_exec_flags(struct anv_queue *queue, + uint64_t *exec_flags, + uint32_t *context_id) +{ + assert(queue != NULL); + + struct anv_device *device = queue->device; + /* Submit to index 0 which is the main (CCS/BCS etc) virtual engine. */ + *exec_flags = device->physical->has_vm_control ? 0 : queue->exec_flags; + + *context_id = device->physical->has_vm_control ? queue->context_id : + device->context_id; +} + static VkResult setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, struct anv_queue *queue, @@ -427,6 +442,10 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, anv_cmd_buffer_clflush(cmd_buffers, num_cmd_buffers); #endif + uint64_t exec_flags = 0; + uint32_t context_id; + get_context_and_exec_flags(queue, &exec_flags, &context_id); + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) execbuf->objects, .buffer_count = execbuf->bo_count, @@ -439,8 +458,8 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, .DR4 = 0, .flags = I915_EXEC_NO_RELOC | I915_EXEC_HANDLE_LUT | - queue->exec_flags, - .rsvd1 = device->context_id, + exec_flags, + .rsvd1 = context_id, .rsvd2 = 0, }; @@ -457,13 +476,17 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue) if (result != VK_SUCCESS) return result; + uint64_t exec_flags = 0; + uint32_t context_id; + get_context_and_exec_flags(queue, &exec_flags, &context_id); + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) execbuf->objects, .buffer_count = execbuf->bo_count, .batch_start_offset = 0, .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */ - .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, - .rsvd1 = device->context_id, + .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC, + .rsvd1 = context_id, .rsvd2 = 0, }; @@ -517,6 +540,10 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue, intel_flush_range(submit->batch_bo->map, submit->batch_bo->size); #endif + uint64_t exec_flags = 0; + uint32_t context_id; + get_context_and_exec_flags(queue, &exec_flags, &context_id); + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) execbuf->objects, .buffer_count = execbuf->bo_count, @@ -525,8 +552,8 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue, .flags = I915_EXEC_NO_RELOC | I915_EXEC_HANDLE_LUT | I915_EXEC_FENCE_ARRAY | - queue->exec_flags, - .rsvd1 = device->context_id, + exec_flags, + .rsvd1 = context_id, .rsvd2 = 0, .num_cliprects = execbuf->syncobj_count, .cliprects_ptr = (uintptr_t)execbuf->syncobjs, @@ -740,13 +767,18 @@ i915_queue_exec_locked(struct anv_queue *queue, .offset = pass_batch_bo->offset, .flags = pass_batch_bo->flags, }; + + uint64_t exec_flags = 0; + uint32_t context_id; + get_context_and_exec_flags(queue, &exec_flags, &context_id); + struct drm_i915_gem_execbuffer2 query_pass_execbuf = { .buffers_ptr = (uintptr_t) &query_pass_object, .buffer_count = 1, .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool, perf_query_pass), - .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags, - .rsvd1 = device->context_id, + .flags = I915_EXEC_HANDLE_LUT | exec_flags, + .rsvd1 = context_id, }; int ret = queue->device->info->no_hw ? 0 : @@ -792,13 +824,17 @@ i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo, if (result != VK_SUCCESS) goto fail; + uint64_t exec_flags = 0; + uint32_t context_id; + get_context_and_exec_flags(queue, &exec_flags, &context_id); + execbuf.execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) execbuf.objects, .buffer_count = execbuf.bo_count, .batch_start_offset = 0, .batch_len = batch_bo_size, - .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, - .rsvd1 = device->context_id, + .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC, + .rsvd1 = context_id, .rsvd2 = 0, }; diff --git a/src/intel/vulkan/i915/anv_device.c b/src/intel/vulkan/i915/anv_device.c index e6e3a9e8825..d16dc3b4c0a 100644 --- a/src/intel/vulkan/i915/anv_device.c +++ b/src/intel/vulkan/i915/anv_device.c @@ -24,6 +24,7 @@ #include "anv_private.h" #include "common/intel_defines.h" +#include "common/i915/intel_gem.h" #include "drm-uapi/i915_drm.h" @@ -44,7 +45,7 @@ vk_priority_to_i915(VkQueueGlobalPriorityKHR priority) } } -static int +int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param, uint64_t value) { if (param == I915_CONTEXT_PARAM_PRIORITY) @@ -68,6 +69,7 @@ anv_i915_physical_device_get_parameters(struct anv_physical_device *device) { VkResult result = VK_SUCCESS; int val, fd = device->local_fd; + uint64_t value; if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) { result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, @@ -122,6 +124,9 @@ anv_i915_physical_device_get_parameters(struct anv_physical_device *device) if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val)) device->has_exec_timeline = val; + if (intel_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_VM, &value)) + device->has_vm_control = value; + return result; } @@ -204,11 +209,52 @@ anv_i915_physical_device_init_memory_types(struct anv_physical_device *device) return VK_SUCCESS; } +VkResult +anv_i915_set_queue_parameters( + struct anv_device *device, + uint32_t context_id, + const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority) +{ + struct anv_physical_device *physical_device = device->physical; + + /* Here we tell the kernel not to attempt to recover our context but + * immediately (on the next batchbuffer submission) report that the + * context is lost, and we will do the recovery ourselves. In the case + * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting + * the client clean up the pieces. + */ + anv_gem_set_context_param(device->fd, context_id, + I915_CONTEXT_PARAM_RECOVERABLE, false); + + VkQueueGlobalPriorityKHR priority = + queue_priority ? queue_priority->globalPriority : + VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR; + + /* As per spec, the driver implementation may deny requests to acquire + * a priority above the default priority (MEDIUM) if the caller does not + * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR + * is returned. + */ + if (physical_device->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) { + int err = anv_gem_set_context_param(device->fd, context_id, + I915_CONTEXT_PARAM_PRIORITY, + priority); + if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) { + return vk_error(device, VK_ERROR_NOT_PERMITTED_KHR); + } + } + + return VK_SUCCESS; +} + VkResult anv_i915_device_setup_context(struct anv_device *device, const VkDeviceCreateInfo *pCreateInfo, const uint32_t num_queues) { + if (device->physical->has_vm_control) + return anv_i915_device_setup_vm(device); + struct anv_physical_device *physical_device = device->physical; VkResult result = VK_SUCCESS; @@ -232,7 +278,7 @@ anv_i915_device_setup_context(struct anv_device *device, if (!intel_gem_create_context_engines(device->fd, 0 /* flags */, physical_device->engine_info, engine_count, engine_classes, - 0 /* vm_id */, + device->vm_id, (uint32_t *)&device->context_id)) result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, "kernel context creation failed"); @@ -245,38 +291,15 @@ anv_i915_device_setup_context(struct anv_device *device, if (result != VK_SUCCESS) return result; - /* Here we tell the kernel not to attempt to recover our context but - * immediately (on the next batchbuffer submission) report that the - * context is lost, and we will do the recovery ourselves. In the case - * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting - * the client clean up the pieces. - */ - anv_gem_set_context_param(device->fd, device->context_id, - I915_CONTEXT_PARAM_RECOVERABLE, false); - /* Check if client specified queue priority. */ const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority = vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR); - VkQueueGlobalPriorityKHR priority = - queue_priority ? queue_priority->globalPriority : - VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR; - - /* As per spec, the driver implementation may deny requests to acquire - * a priority above the default priority (MEDIUM) if the caller does not - * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR - * is returned. - */ - if (physical_device->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) { - int err = anv_gem_set_context_param(device->fd, device->context_id, - I915_CONTEXT_PARAM_PRIORITY, - priority); - if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) { - result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR); - goto fail_context; - } - } + result = anv_i915_set_queue_parameters(device, device->context_id, + queue_priority); + if (result != VK_SUCCESS) + goto fail_context; return result; @@ -285,42 +308,46 @@ fail_context: return result; } -static int -anv_gem_context_get_reset_stats(int fd, int context, - uint32_t *active, uint32_t *pending) +static VkResult +anv_gem_context_get_reset_stats(struct anv_device *device, int context) { struct drm_i915_reset_stats stats = { .ctx_id = context, }; - int ret = intel_ioctl(fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats); - if (ret == 0) { - *active = stats.batch_active; - *pending = stats.batch_pending; + int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats); + if (ret == -1) { + /* We don't know the real error. */ + return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m"); } - return ret; + if (stats.batch_active) { + return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers"); + } else if (stats.batch_pending) { + return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight"); + } + + return VK_SUCCESS; } VkResult anv_i915_device_check_status(struct vk_device *vk_device) { struct anv_device *device = container_of(vk_device, struct anv_device, vk); - uint32_t active = 0, pending = 0; - int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id, - &active, &pending); - if (ret == -1) { - /* We don't know the real error. */ - return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m"); + VkResult result; + + if (device->physical->has_vm_control) { + for (uint32_t i = 0; i < device->queue_count; i++) { + result = anv_gem_context_get_reset_stats(device, + device->queues[i].context_id); + if (result != VK_SUCCESS) + return result; + } + } else { + result = anv_gem_context_get_reset_stats(device, device->context_id); } - if (active) { - return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers"); - } else if (pending) { - return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight"); - } - - return VK_SUCCESS; + return result; } bool diff --git a/src/intel/vulkan/i915/anv_device.h b/src/intel/vulkan/i915/anv_device.h index d77349c1cd1..0d871a41199 100644 --- a/src/intel/vulkan/i915/anv_device.h +++ b/src/intel/vulkan/i915/anv_device.h @@ -41,3 +41,7 @@ anv_i915_device_setup_context(struct anv_device *device, VkResult anv_i915_device_check_status(struct vk_device *vk_device); bool anv_i915_device_destroy_vm(struct anv_device *device); VkResult anv_i915_device_setup_vm(struct anv_device *device); +VkResult anv_i915_set_queue_parameters( + struct anv_device *device, + uint32_t context_id, + const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority); diff --git a/src/intel/vulkan/i915/anv_queue.c b/src/intel/vulkan/i915/anv_queue.c index 28e02e90466..59c918be234 100644 --- a/src/intel/vulkan/i915/anv_queue.c +++ b/src/intel/vulkan/i915/anv_queue.c @@ -55,6 +55,33 @@ anv_i915_create_engine(struct anv_device *device, default: unreachable("Unsupported legacy engine"); } + } else if (device->physical->has_vm_control) { + assert(pCreateInfo->queueFamilyIndex < physical->queue.family_count); + enum intel_engine_class engine_classes[2]; + int engine_count = 0; + + engine_classes[engine_count++] = queue_family->engine_class; + + if (!intel_gem_create_context_engines(device->fd, 0 /* flags */, + physical->engine_info, + engine_count, engine_classes, + device->vm_id, + (uint32_t *)&queue->context_id)) + return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "engine creation failed"); + + /* Check if client specified queue priority. */ + const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority = + vk_find_struct_const(pCreateInfo->pNext, + DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR); + + VkResult result = anv_i915_set_queue_parameters(device, + queue->context_id, + queue_priority); + if (result != VK_SUCCESS) { + intel_gem_destroy_context(device->fd, queue->context_id); + return result; + } } else { /* When using the new engine creation uAPI, the exec_flags value is the * index of the engine in the group specified at GEM context creation. @@ -68,5 +95,6 @@ anv_i915_create_engine(struct anv_device *device, void anv_i915_destroy_engine(struct anv_device *device, struct anv_queue *queue) { - /* NO-OP */ + if (device->physical->has_vm_control) + intel_gem_destroy_context(device->fd, queue->context_id); }