From 40b67a292297606f0a7576e3ef4087028d5edd17 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Sat, 19 Jun 2021 13:14:16 +0300
Subject: [PATCH] anv: allocate bigger batches as we grow command buffers

This is the first time we see an application running out of mmap().

We essentially allocate too many batches (+65k) and end up not being
able to mmap them, at which point we can't mmap anything anymore and
things go sideways.

This change allocates bigger batch BOs as we grow an existing command
buffer. This drastically reduces the number of BOs we need to allocate
(the benchmark that reported the issue now reaches a max of ~630 BOs,
instead of reaching 65k and failing previously).

v2: Track the total batch size of command buffers (Jason)
    Just give 0 for batch_len to i915 (Jason)

v3: Fix indentation (Jason)

v4: Drop uncessary reshuffling of error labels (Jason)

v5: Remove empty lines (Marcin)

v6: Limit BO growing to chunks of 16Mb (Jason)

v7: Add assert on initial size (Jason)

v8: Add define for max size (Jason)

v9: Fixup v7 assert for non softpin platforms (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4956
Cc: mesa-stable
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11482>
---
 src/intel/vulkan/anv_allocator.c   |  4 +++-
 src/intel/vulkan/anv_batch_chain.c | 37 +++++++++++++++++++++++-------
 src/intel/vulkan/anv_private.h     |  9 +++++++-
 3 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 1be8f02247b..88402b733a3 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -1756,7 +1756,9 @@ anv_device_alloc_bo(struct anv_device *device,
       new_bo.map = anv_gem_mmap(device, new_bo.gem_handle, 0, size, 0);
       if (new_bo.map == MAP_FAILED) {
          anv_gem_close(device, new_bo.gem_handle);
-         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+         return vk_errorf(device, &device->vk.base,
+                          VK_ERROR_OUT_OF_HOST_MEMORY,
+                          "mmap failed: %m");
       }
    }
 
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index a4a71d3c906..f4cdc118d7b 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -341,6 +341,7 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
 
 static VkResult
 anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
+                    uint32_t size,
                     struct anv_batch_bo **bbo_out)
 {
    VkResult result;
@@ -351,7 +352,7 @@ anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
    result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool,
-                              ANV_CMD_BUFFER_BATCH_SIZE, &bbo->bo);
+                              size, &bbo->bo);
    if (result != VK_SUCCESS)
       goto fail_alloc;
 
@@ -662,11 +663,16 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
 {
    struct anv_cmd_buffer *cmd_buffer = _data;
    struct anv_batch_bo *new_bbo;
+   /* Cap reallocation to chunk. */
+   uint32_t alloc_size = MIN2(cmd_buffer->total_batch_size,
+                              ANV_MAX_CMD_BUFFER_BATCH_SIZE);
 
-   VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo);
+   VkResult result = anv_batch_bo_create(cmd_buffer, alloc_size, &new_bbo);
    if (result != VK_SUCCESS)
       return result;
 
+   cmd_buffer->total_batch_size += alloc_size;
+
    struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos);
    if (seen_bbo == NULL) {
       anv_batch_bo_destroy(new_bbo, cmd_buffer);
@@ -833,7 +839,11 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
 
    list_inithead(&cmd_buffer->batch_bos);
 
-   result = anv_batch_bo_create(cmd_buffer, &batch_bo);
+   cmd_buffer->total_batch_size = ANV_MIN_CMD_BUFFER_BATCH_SIZE;
+
+   result = anv_batch_bo_create(cmd_buffer,
+                                cmd_buffer->total_batch_size,
+                                &batch_bo);
    if (result != VK_SUCCESS)
       return result;
 
@@ -939,8 +949,14 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
    cmd_buffer->seen_bbos.head = 0;
    cmd_buffer->seen_bbos.tail = 0;
 
-   *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) =
-      anv_cmd_buffer_current_batch_bo(cmd_buffer);
+   struct anv_batch_bo *first_bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
+
+   *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = first_bbo;
+
+
+   assert(!cmd_buffer->device->can_chain_batches ||
+          first_bbo->bo->size == ANV_MIN_CMD_BUFFER_BATCH_SIZE);
+   cmd_buffer->total_batch_size = first_bbo->bo->size;
 }
 
 void
@@ -1018,7 +1034,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
           */
          batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
       } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) &&
-                 (length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) {
+                 (length < ANV_MIN_CMD_BUFFER_BATCH_SIZE / 2)) {
          /* If the secondary has exactly one batch buffer in its list *and*
           * that batch buffer is less than half of the maximum size, we're
           * probably better of simply copying it into our batch.
@@ -1793,7 +1809,11 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
       .buffers_ptr = (uintptr_t) execbuf->objects,
       .buffer_count = execbuf->bo_count,
       .batch_start_offset = 0,
-      .batch_len = batch->next - batch->start,
+      /* On platforms that cannot chain batch buffers because of the i915
+       * command parser, we have to provide the batch length. Everywhere else
+       * we'll chain batches so no point in passing a length.
+       */
+      .batch_len = device->can_chain_batches ? 0 : batch->next - batch->start,
       .cliprects_ptr = 0,
       .num_cliprects = 0,
       .DR1 = 0,
@@ -1913,7 +1933,8 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
       submit->perf_query_pool;
 
    if (INTEL_DEBUG & DEBUG_SUBMIT) {
-      fprintf(stderr, "Batch on queue 0\n");
+      fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0\n",
+              execbuf.execbuf.batch_start_offset, execbuf.execbuf.batch_len);
       for (uint32_t i = 0; i < execbuf.bo_count; i++) {
          const struct anv_bo *bo = execbuf.bos[i];
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 932bf8b2e58..d380d6c52e2 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -3055,7 +3055,8 @@ struct anv_cmd_pool {
    VkCommandPoolCreateFlags                     flags;
 };
 
-#define ANV_CMD_BUFFER_BATCH_SIZE 8192
+#define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
+#define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
 
 enum anv_cmd_buffer_exec_mode {
    ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
@@ -3144,6 +3145,12 @@ struct anv_cmd_buffer {
     * used.
     */
    uint32_t                                      perf_reloc_idx;
+
+   /**
+    * Sum of all the anv_batch_bo sizes allocated for this command buffer.
+    * Used to increase allocation size for long command buffers.
+    */
+   uint32_t                                     total_batch_size;
 };
 
 /* Determine whether we can chain a given cmd_buffer to another one. We need