From a5f2c8c845e5722e2be0c8336cc5bcd21cfb0285 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Fri, 9 Jun 2023 14:22:58 -0700
Subject: [PATCH] anv: create individual logical engines on i915 when possible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This enables us to create more logical engines than HW engines are
available. This also brings the uAPI usage closer to what is happening
on Xe.

Rework: (Sagar)
- Correct exec_flag at the time of submission
- Handle device status check
- Set queue parameters

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23661>
---
 src/intel/vulkan/anv_device.c           |   6 +-
 src/intel/vulkan/anv_private.h          |   6 ++
 src/intel/vulkan/i915/anv_batch_chain.c |  56 +++++++++--
 src/intel/vulkan/i915/anv_device.c      | 127 ++++++++++++++----------
 src/intel/vulkan/i915/anv_device.h      |   4 +
 src/intel/vulkan/i915/anv_queue.c       |  30 +++++-
 6 files changed, 167 insertions(+), 62 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index f8ef727981a..9dd15fa919c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -60,6 +60,7 @@
 #include "vk_drm_syncobj.h"
 #include "common/intel_aux_map.h"
 #include "common/intel_uuid.h"
+#include "common/i915/intel_gem.h"
 #include "perf/intel_perf.h"
 
 #include "i915/anv_device.h"
@@ -3005,7 +3006,10 @@ anv_device_destroy_context_or_vm(struct anv_device *device)
 {
    switch (device->info->kmd_type) {
    case INTEL_KMD_TYPE_I915:
-      return intel_gem_destroy_context(device->fd, device->context_id);
+      if (device->physical->has_vm_control)
+         return anv_i915_device_destroy_vm(device);
+      else
+         return intel_gem_destroy_context(device->fd, device->context_id);
    case INTEL_KMD_TYPE_XE:
       return anv_xe_device_destroy_vm(device);
    default:
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index ede24a649c0..1fff2bd3e09 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -914,6 +914,9 @@ struct anv_physical_device {
     /** True if we can create protected contexts. */
     bool                                        has_protected_contexts;
 
+    /** Whether the i915 driver has the ability to create VM objects */
+    bool                                        has_vm_control;
+
     /**/
     bool                                        uses_ex_bso;
 
@@ -1068,6 +1071,7 @@ struct anv_queue {
 
    union {
       uint32_t                               exec_flags; /* i915 */
+      uint32_t                               context_id; /* i915 */
       uint32_t                               exec_queue_id; /* Xe */
    };
 
@@ -1761,6 +1765,8 @@ int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
+int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param,
+                              uint64_t value);
 
 uint64_t anv_vma_alloc(struct anv_device *device,
                        uint64_t size, uint64_t align,
diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c
index aee04c5093d..875b3abba6c 100644
--- a/src/intel/vulkan/i915/anv_batch_chain.c
+++ b/src/intel/vulkan/i915/anv_batch_chain.c
@@ -318,6 +318,21 @@ pin_state_pool(struct anv_device *device,
    return VK_SUCCESS;
 }
 
+static void
+get_context_and_exec_flags(struct anv_queue *queue,
+                           uint64_t *exec_flags,
+                           uint32_t *context_id)
+{
+   assert(queue != NULL);
+
+   struct anv_device *device = queue->device;
+   /* Submit to index 0 which is the main (CCS/BCS etc) virtual engine. */
+   *exec_flags = device->physical->has_vm_control ? 0 : queue->exec_flags;
+
+   *context_id = device->physical->has_vm_control ? queue->context_id :
+                                                    device->context_id;
+}
+
 static VkResult
 setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
                               struct anv_queue *queue,
@@ -427,6 +442,10 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
       anv_cmd_buffer_clflush(cmd_buffers, num_cmd_buffers);
 #endif
 
+   uint64_t exec_flags = 0;
+   uint32_t context_id;
+   get_context_and_exec_flags(queue, &exec_flags, &context_id);
+
    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
       .buffers_ptr = (uintptr_t) execbuf->objects,
       .buffer_count = execbuf->bo_count,
@@ -439,8 +458,8 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
       .DR4 = 0,
       .flags = I915_EXEC_NO_RELOC |
                I915_EXEC_HANDLE_LUT |
-               queue->exec_flags,
-      .rsvd1 = device->context_id,
+               exec_flags,
+      .rsvd1 = context_id,
       .rsvd2 = 0,
    };
 
@@ -457,13 +476,17 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
    if (result != VK_SUCCESS)
       return result;
 
+   uint64_t exec_flags = 0;
+   uint32_t context_id;
+   get_context_and_exec_flags(queue, &exec_flags, &context_id);
+
    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
       .buffers_ptr = (uintptr_t) execbuf->objects,
       .buffer_count = execbuf->bo_count,
       .batch_start_offset = 0,
       .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
-      .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
-      .rsvd1 = device->context_id,
+      .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC,
+      .rsvd1 = context_id,
       .rsvd2 = 0,
    };
 
@@ -517,6 +540,10 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
       intel_flush_range(submit->batch_bo->map, submit->batch_bo->size);
 #endif
 
+   uint64_t exec_flags = 0;
+   uint32_t context_id;
+   get_context_and_exec_flags(queue, &exec_flags, &context_id);
+
    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
       .buffers_ptr = (uintptr_t) execbuf->objects,
       .buffer_count = execbuf->bo_count,
@@ -525,8 +552,8 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
       .flags = I915_EXEC_NO_RELOC |
                I915_EXEC_HANDLE_LUT |
                I915_EXEC_FENCE_ARRAY |
-               queue->exec_flags,
-      .rsvd1 = device->context_id,
+               exec_flags,
+      .rsvd1 = context_id,
       .rsvd2 = 0,
       .num_cliprects = execbuf->syncobj_count,
       .cliprects_ptr = (uintptr_t)execbuf->syncobjs,
@@ -740,13 +767,18 @@ i915_queue_exec_locked(struct anv_queue *queue,
          .offset = pass_batch_bo->offset,
          .flags  = pass_batch_bo->flags,
       };
+
+      uint64_t exec_flags = 0;
+      uint32_t context_id;
+      get_context_and_exec_flags(queue, &exec_flags, &context_id);
+
       struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
          .buffers_ptr = (uintptr_t) &query_pass_object,
          .buffer_count = 1,
          .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
                                                               perf_query_pass),
-         .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags,
-         .rsvd1 = device->context_id,
+         .flags = I915_EXEC_HANDLE_LUT | exec_flags,
+         .rsvd1 = context_id,
       };
 
       int ret = queue->device->info->no_hw ? 0 :
@@ -792,13 +824,17 @@ i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
    if (result != VK_SUCCESS)
       goto fail;
 
+   uint64_t exec_flags = 0;
+   uint32_t context_id;
+   get_context_and_exec_flags(queue, &exec_flags, &context_id);
+
    execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
       .buffers_ptr = (uintptr_t) execbuf.objects,
       .buffer_count = execbuf.bo_count,
       .batch_start_offset = 0,
       .batch_len = batch_bo_size,
-      .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
-      .rsvd1 = device->context_id,
+      .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC,
+      .rsvd1 = context_id,
       .rsvd2 = 0,
    };
 
diff --git a/src/intel/vulkan/i915/anv_device.c b/src/intel/vulkan/i915/anv_device.c
index e6e3a9e8825..d16dc3b4c0a 100644
--- a/src/intel/vulkan/i915/anv_device.c
+++ b/src/intel/vulkan/i915/anv_device.c
@@ -24,6 +24,7 @@
 #include "anv_private.h"
 
 #include "common/intel_defines.h"
+#include "common/i915/intel_gem.h"
 
 #include "drm-uapi/i915_drm.h"
 
@@ -44,7 +45,7 @@ vk_priority_to_i915(VkQueueGlobalPriorityKHR priority)
    }
 }
 
-static int
+int
 anv_gem_set_context_param(int fd, uint32_t context, uint32_t param, uint64_t value)
 {
    if (param == I915_CONTEXT_PARAM_PRIORITY)
@@ -68,6 +69,7 @@ anv_i915_physical_device_get_parameters(struct anv_physical_device *device)
 {
    VkResult result = VK_SUCCESS;
    int val, fd = device->local_fd;
+   uint64_t value;
 
    if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
        result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
@@ -122,6 +124,9 @@ anv_i915_physical_device_get_parameters(struct anv_physical_device *device)
    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
       device->has_exec_timeline = val;
 
+   if (intel_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_VM, &value))
+      device->has_vm_control = value;
+
    return result;
 }
 
@@ -204,11 +209,52 @@ anv_i915_physical_device_init_memory_types(struct anv_physical_device *device)
    return VK_SUCCESS;
 }
 
+VkResult
+anv_i915_set_queue_parameters(
+      struct anv_device *device,
+      uint32_t context_id,
+      const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority)
+{
+   struct anv_physical_device *physical_device = device->physical;
+
+   /* Here we tell the kernel not to attempt to recover our context but
+    * immediately (on the next batchbuffer submission) report that the
+    * context is lost, and we will do the recovery ourselves.  In the case
+    * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
+    * the client clean up the pieces.
+    */
+   anv_gem_set_context_param(device->fd, context_id,
+                             I915_CONTEXT_PARAM_RECOVERABLE, false);
+
+   VkQueueGlobalPriorityKHR priority =
+      queue_priority ? queue_priority->globalPriority :
+         VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
+
+   /* As per spec, the driver implementation may deny requests to acquire
+    * a priority above the default priority (MEDIUM) if the caller does not
+    * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
+    * is returned.
+    */
+   if (physical_device->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
+      int err = anv_gem_set_context_param(device->fd, context_id,
+                                          I915_CONTEXT_PARAM_PRIORITY,
+                                          priority);
+      if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
+         return vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
 VkResult
 anv_i915_device_setup_context(struct anv_device *device,
                               const VkDeviceCreateInfo *pCreateInfo,
                               const uint32_t num_queues)
 {
+   if (device->physical->has_vm_control)
+      return anv_i915_device_setup_vm(device);
+
    struct anv_physical_device *physical_device = device->physical;
    VkResult result = VK_SUCCESS;
 
@@ -232,7 +278,7 @@ anv_i915_device_setup_context(struct anv_device *device,
       if (!intel_gem_create_context_engines(device->fd, 0 /* flags */,
                                             physical_device->engine_info,
                                             engine_count, engine_classes,
-                                            0 /* vm_id */,
+                                            device->vm_id,
                                             (uint32_t *)&device->context_id))
          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
                             "kernel context creation failed");
@@ -245,38 +291,15 @@ anv_i915_device_setup_context(struct anv_device *device,
    if (result != VK_SUCCESS)
       return result;
 
-   /* Here we tell the kernel not to attempt to recover our context but
-    * immediately (on the next batchbuffer submission) report that the
-    * context is lost, and we will do the recovery ourselves.  In the case
-    * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
-    * the client clean up the pieces.
-    */
-   anv_gem_set_context_param(device->fd, device->context_id,
-                             I915_CONTEXT_PARAM_RECOVERABLE, false);
-
    /* Check if client specified queue priority. */
    const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
       vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
 
-   VkQueueGlobalPriorityKHR priority =
-      queue_priority ? queue_priority->globalPriority :
-         VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
-
-   /* As per spec, the driver implementation may deny requests to acquire
-    * a priority above the default priority (MEDIUM) if the caller does not
-    * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
-    * is returned.
-    */
-   if (physical_device->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
-      int err = anv_gem_set_context_param(device->fd, device->context_id,
-                                          I915_CONTEXT_PARAM_PRIORITY,
-                                          priority);
-      if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
-         result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
-         goto fail_context;
-      }
-   }
+   result = anv_i915_set_queue_parameters(device, device->context_id,
+                                          queue_priority);
+   if (result != VK_SUCCESS)
+      goto fail_context;
 
    return result;
 
@@ -285,42 +308,46 @@ fail_context:
    return result;
 }
 
-static int
-anv_gem_context_get_reset_stats(int fd, int context,
-                                uint32_t *active, uint32_t *pending)
+static VkResult
+anv_gem_context_get_reset_stats(struct anv_device *device, int context)
 {
    struct drm_i915_reset_stats stats = {
       .ctx_id = context,
    };
 
-   int ret = intel_ioctl(fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats);
-   if (ret == 0) {
-      *active = stats.batch_active;
-      *pending = stats.batch_pending;
+   int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats);
+   if (ret == -1) {
+      /* We don't know the real error. */
+      return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
    }
 
-   return ret;
+   if (stats.batch_active) {
+      return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
+   } else if (stats.batch_pending) {
+      return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
+   }
+
+   return VK_SUCCESS;
 }
 
 VkResult
 anv_i915_device_check_status(struct vk_device *vk_device)
 {
    struct anv_device *device = container_of(vk_device, struct anv_device, vk);
-   uint32_t active = 0, pending = 0;
-   int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
-                                             &active, &pending);
-   if (ret == -1) {
-      /* We don't know the real error. */
-      return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
+   VkResult result;
+
+   if (device->physical->has_vm_control) {
+      for (uint32_t i = 0; i < device->queue_count; i++) {
+         result = anv_gem_context_get_reset_stats(device,
+                                                  device->queues[i].context_id);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+   } else {
+      result = anv_gem_context_get_reset_stats(device, device->context_id);
    }
 
-   if (active) {
-      return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
-   } else if (pending) {
-      return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
-   }
-
-   return VK_SUCCESS;
+   return result;
 }
 
 bool
diff --git a/src/intel/vulkan/i915/anv_device.h b/src/intel/vulkan/i915/anv_device.h
index d77349c1cd1..0d871a41199 100644
--- a/src/intel/vulkan/i915/anv_device.h
+++ b/src/intel/vulkan/i915/anv_device.h
@@ -41,3 +41,7 @@ anv_i915_device_setup_context(struct anv_device *device,
 VkResult anv_i915_device_check_status(struct vk_device *vk_device);
 bool anv_i915_device_destroy_vm(struct anv_device *device);
 VkResult anv_i915_device_setup_vm(struct anv_device *device);
+VkResult anv_i915_set_queue_parameters(
+      struct anv_device *device,
+      uint32_t context_id,
+      const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority);
diff --git a/src/intel/vulkan/i915/anv_queue.c b/src/intel/vulkan/i915/anv_queue.c
index 28e02e90466..59c918be234 100644
--- a/src/intel/vulkan/i915/anv_queue.c
+++ b/src/intel/vulkan/i915/anv_queue.c
@@ -55,6 +55,33 @@ anv_i915_create_engine(struct anv_device *device,
       default:
          unreachable("Unsupported legacy engine");
       }
+   } else if (device->physical->has_vm_control) {
+      assert(pCreateInfo->queueFamilyIndex < physical->queue.family_count);
+      enum intel_engine_class engine_classes[2];
+      int engine_count = 0;
+
+      engine_classes[engine_count++] = queue_family->engine_class;
+
+      if (!intel_gem_create_context_engines(device->fd, 0 /* flags */,
+                                            physical->engine_info,
+                                            engine_count, engine_classes,
+                                            device->vm_id,
+                                            (uint32_t *)&queue->context_id))
+         return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
+                          "engine creation failed");
+
+      /* Check if client specified queue priority. */
+      const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
+         vk_find_struct_const(pCreateInfo->pNext,
+                              DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
+
+      VkResult result = anv_i915_set_queue_parameters(device,
+                                                      queue->context_id,
+                                                      queue_priority);
+      if (result != VK_SUCCESS) {
+         intel_gem_destroy_context(device->fd, queue->context_id);
+         return result;
+      }
    } else {
       /* When using the new engine creation uAPI, the exec_flags value is the
        * index of the engine in the group specified at GEM context creation.
@@ -68,5 +95,6 @@ anv_i915_create_engine(struct anv_device *device,
 void
 anv_i915_destroy_engine(struct anv_device *device, struct anv_queue *queue)
 {
-   /* NO-OP */
+   if (device->physical->has_vm_control)
+      intel_gem_destroy_context(device->fd, queue->context_id);
 }