anv: fail pool allocation when over the maximal size

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25955>
2025-12-27 21:20:12 +01:00 · 2023-10-12 12:02:55 +03:00 · 2023-10-12 12:02:55 +03:00 · 8d813a90d6
commit 8d813a90d6
parent 8fc42d83be
8 changed files with 273 additions and 36 deletions
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@ -352,13 +352,15 @@ anv_block_pool_init(struct anv_block_pool *pool,
                    const char *name,
                    uint64_t start_address,
                    uint32_t initial_size,
-                    uint64_t max_size)
+                    uint32_t max_size)
 {
   VkResult result;

   /* Make sure VMA addresses are aligned for the block pool */
   assert(anv_is_aligned(start_address, device->info->mem_alignment));
   assert(anv_is_aligned(initial_size, device->info->mem_alignment));
+   assert(max_size > 0);
+   assert(max_size > initial_size);

   pool->name = name;
   pool->device = device;
@ -540,11 +542,14 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state,
    */
   required = MAX2(required, old_size + contiguous_size);

-   if (total_used * 2 > required) {
+   if (required > pool->max_size) {
+      result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+   } else if (total_used * 2 > required) {
      uint32_t size = old_size * 2;
      while (size < required)
         size *= 2;

+      size = MIN2(size, pool->max_size);
      assert(size > pool->size);

      result = anv_block_pool_expand_range(pool, size);
@ -562,10 +567,12 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state,
   return pool->size;
 }

-static uint32_t
+static VkResult
 anv_block_pool_alloc_new(struct anv_block_pool *pool,
                         struct anv_block_state *pool_state,
-                         uint32_t block_size, uint32_t *padding)
+                         uint32_t block_size,
+                         int64_t *offset,
+                         uint32_t *padding)
 {
   struct anv_block_state state, old, new;

@ -575,8 +582,11 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,

   while (1) {
      state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size);
-      if (state.next + block_size <= state.end) {
-         return state.next;
+      if (state.next + block_size > pool->max_size) {
+         return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      } else if (state.next + block_size <= state.end) {
+         *offset =  state.next;
+         return VK_SUCCESS;
      } else if (state.next <= state.end) {
         if (state.next < state.end) {
            /* We need to grow the block pool, but still have some leftover
@ -602,12 +612,17 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
         new.next = state.next + block_size;
         do {
            new.end = anv_block_pool_grow(pool, pool_state, block_size);
+            if (pool->size > 0 && new.end == 0) {
+               futex_wake(&pool_state->end, INT_MAX);
+               return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+            }
         } while (new.end < new.next);

         old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64);
         if (old.next != state.next)
            futex_wake(&pool_state->end, INT_MAX);
-         return state.next;
+         *offset = state.next;
+         return VK_SUCCESS;
      } else {
         futex_wait(&pool_state->end, state.end, NULL);
         continue;
@ -615,15 +630,12 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
   }
 }

-int32_t
+VkResult
 anv_block_pool_alloc(struct anv_block_pool *pool,
-                     uint32_t block_size, uint32_t *padding)
+                     uint32_t block_size,
+                     int64_t *offset, uint32_t *padding)
 {
-   uint32_t offset;
-
-   offset = anv_block_pool_alloc_new(pool, &pool->state, block_size, padding);
-
-   return offset;
+   return anv_block_pool_alloc_new(pool, &pool->state, block_size, offset, padding);
 }

 VkResult
@ -670,15 +682,15 @@ anv_state_pool_finish(struct anv_state_pool *pool)
   anv_block_pool_finish(&pool->block_pool);
 }

-static uint32_t
+static VkResult
 anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
                                    struct anv_block_pool *block_pool,
                                    uint32_t state_size,
                                    uint32_t block_size,
+                                    int64_t *offset,
                                    uint32_t *padding)
 {
   struct anv_block_state block, old, new;
-   uint32_t offset;

   /* We don't always use anv_block_pool_alloc(), which would set *padding to
    * zero for us. So if we have a pointer to padding, we must zero it out
@ -691,21 +703,25 @@ anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
    * Instead, we just grab whole (potentially large) blocks.
    */
   if (state_size >= block_size)
-      return anv_block_pool_alloc(block_pool, state_size, padding);
+      return anv_block_pool_alloc(block_pool, state_size, offset, padding);

 restart:
   block.u64 = __sync_fetch_and_add(&pool->block.u64, state_size);

   if (block.next < block.end) {
-      return block.next;
+      *offset = block.next;
+      return VK_SUCCESS;
   } else if (block.next == block.end) {
-      offset = anv_block_pool_alloc(block_pool, block_size, padding);
-      new.next = offset + state_size;
-      new.end = offset + block_size;
+      VkResult result = anv_block_pool_alloc(block_pool, block_size,
+                                             offset, padding);
+      if (result != VK_SUCCESS)
+         return result;
+      new.next = *offset + state_size;
+      new.end = *offset + block_size;
      old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
      if (old.next != block.next)
         futex_wake(&pool->block.end, INT_MAX);
-      return offset;
+      return result;
   } else {
      futex_wait(&pool->block.end, block.end, NULL);
      goto restart;
@ -824,7 +840,7 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,

   struct anv_state *state;
   uint32_t alloc_size = anv_state_pool_get_bucket_size(bucket);
-   int32_t offset;
+   int64_t offset;

   /* Try free list first. */
   state = anv_free_list_pop(&pool->buckets[bucket].free_list,
@ -884,14 +900,19 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
   }

   uint32_t padding;
-   offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
-                                                &pool->block_pool,
-                                                alloc_size,
-                                                pool->block_size,
-                                                &padding);
+   VkResult result =
+      anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
+                                          &pool->block_pool,
+                                          alloc_size,
+                                          pool->block_size,
+                                          &offset,
+                                          &padding);
+   if (result != VK_SUCCESS)
+      return ANV_STATE_NULL;
+
   /* Every time we allocate a new state, add it to the state pool */
   uint32_t idx = 0;
-   UNUSED VkResult result = anv_state_table_add(&pool->table, &idx, 1);
+   result = anv_state_table_add(&pool->table, &idx, 1);
   assert(result == VK_SUCCESS);

   state = anv_state_table_get(&pool->table, idx);
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@ -692,10 +692,12 @@ VkResult anv_block_pool_init(struct anv_block_pool *pool,
                             const char *name,
                             uint64_t start_address,
                             uint32_t initial_size,
-                             uint64_t max_size);
+                             uint32_t max_size);
 void anv_block_pool_finish(struct anv_block_pool *pool);
-int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
-                             uint32_t block_size, uint32_t *padding);
+VkResult anv_block_pool_alloc(struct anv_block_pool *pool,
+                              uint32_t block_size,
+                              int64_t *offset,
+                              uint32_t *padding);
 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
 size);

@ -704,7 +706,7 @@ struct anv_state_pool_params {
   uint64_t    base_address;
   int64_t     start_offset;
   uint32_t    block_size;
-   uint64_t    max_size;
+   uint32_t    max_size;
 };

 VkResult anv_state_pool_init(struct anv_state_pool *pool,
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@ -301,10 +301,12 @@ if with_tests

    'tests/state_pool.c',
    'tests/state_pool_free_list_only.c',
+    'tests/state_pool_max_size.c',
    'tests/state_pool_no_free.c',
    'tests/state_pool_padding.c',
    'tests/block_pool_no_free.c',
    'tests/block_pool_grow_first.c',
+    'tests/block_pool_max_size.c',
  )

  test(
--- a/src/intel/vulkan/tests/anv_tests.cpp
+++ b/src/intel/vulkan/tests/anv_tests.cpp
@ -11,11 +11,14 @@

 ANV_C_TEST(StatePool, Regular, state_pool_test);
 ANV_C_TEST(StatePool, FreeListOnly, state_pool_free_list_only_test);
+ANV_C_TEST(StatePool, MaxSizeOverLimit, state_pool_max_size_over_limit);
+ANV_C_TEST(StatePool, MaxSizeWithinLimit, state_pool_max_size_within_limit);
 ANV_C_TEST(StatePool, NoFree, state_pool_no_free_test);
 ANV_C_TEST(StatePool, Padding, state_pool_padding_test);

 ANV_C_TEST(BlockPool, NoFree, block_pool_no_free_test);
 ANV_C_TEST(BlockPool, GrowFirst, block_pool_grow_first_test);
+ANV_C_TEST(BlockPool, MaxSize, block_pool_max_size);

 extern "C" void FAIL_IN_GTEST(const char *file_path, unsigned line_number, const char *msg) {
   GTEST_FAIL_AT(file_path, line_number) << msg;
--- a/src/intel/vulkan/tests/block_pool_grow_first.c
+++ b/src/intel/vulkan/tests/block_pool_grow_first.c
@ -48,7 +48,9 @@ void block_pool_grow_first_test(void)
   ASSERT(pool.size == initial_size);

   uint32_t padding;
-   int32_t offset = anv_block_pool_alloc(&pool, block_size, &padding);
+   int64_t offset;
+   VkResult result = anv_block_pool_alloc(&pool, block_size, &offset, &padding);
+   ASSERT(result == VK_SUCCESS);

   /* Pool will have grown at least space to fit the new allocation. */
   ASSERT(pool.size > initial_size);
--- a/src/intel/vulkan/tests/block_pool_max_size.c
+++ b/src/intel/vulkan/tests/block_pool_max_size.c
@ -0,0 +1,73 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+#include "test_common.h"
+
+void block_pool_max_size(void);
+
+void block_pool_max_size(void)
+{
+   struct anv_physical_device physical_device = {};
+   struct anv_device device = {};
+   struct anv_block_pool pool;
+
+   const uint32_t block_size = 16 * 1024;
+   const uint32_t initial_size = block_size;
+   const uint32_t _1Mb = 1024 * 1024;
+
+   test_device_info_init(&physical_device.info);
+   anv_device_set_physical(&device, &physical_device);
+   device.kmd_backend = anv_kmd_backend_get(INTEL_KMD_TYPE_STUB);
+   pthread_mutex_init(&device.mutex, NULL);
+   anv_bo_cache_init(&device.bo_cache, &device);
+   anv_block_pool_init(&pool, &device, "test", 4096, initial_size, _1Mb);
+   ASSERT(pool.size == initial_size);
+
+   for (uint32_t i = 0; i < _1Mb / block_size; i++) {
+      uint32_t padding;
+      int64_t offset;
+
+      VkResult result = anv_block_pool_alloc(&pool, block_size, &offset, &padding);
+      ASSERT(result == VK_SUCCESS);
+
+      /* Pool will have grown at least space to fit the new allocation. */
+      ASSERT(pool.size <= _1Mb);
+
+      /* Use the memory to ensure it is valid. */
+      void *map = anv_block_pool_map(&pool, offset, block_size);
+      memset(map, 22, block_size);
+   }
+
+   {
+      uint32_t padding;
+      int64_t offset;
+
+      VkResult result = anv_block_pool_alloc(&pool, block_size, &offset, &padding);
+      ASSERT(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
+
+   anv_block_pool_finish(&pool);
+   anv_bo_cache_finish(&device.bo_cache);
+   pthread_mutex_destroy(&device.mutex);
+}
--- a/src/intel/vulkan/tests/block_pool_no_free.c
+++ b/src/intel/vulkan/tests/block_pool_no_free.c
@ -43,11 +43,14 @@ static void *alloc_blocks(void *_job)
   struct job *job = _job;
   uint32_t job_id = job - jobs;
   uint32_t block_size = 16 * ((job_id % 4) + 1);
-   int32_t block, *data;
+   int64_t block;
+   int32_t *data;

   for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
      UNUSED uint32_t padding;
-      block = anv_block_pool_alloc(job->pool, block_size, &padding);
+      VkResult result = anv_block_pool_alloc(job->pool, block_size,
+                                             &block, &padding);
+      ASSERT(result == VK_SUCCESS);
      data = anv_block_pool_map(job->pool, block, block_size);
      *data = block;
      ASSERT(block >= 0);
--- a/src/intel/vulkan/tests/state_pool_max_size.c
+++ b/src/intel/vulkan/tests/state_pool_max_size.c
@ -0,0 +1,131 @@
+/*
+ * Copyright © 2023 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "anv_private.h"
+#include "test_common.h"
+
+#define NUM_THREADS 16
+#define STATES_PER_THREAD 1024
+#define NUM_RUNS 1
+
+static struct job {
+   pthread_t thread;
+   uint32_t state_size;
+   uint32_t state_alignment;
+   struct anv_state_pool *pool;
+   struct anv_state states[STATES_PER_THREAD];
+} jobs[NUM_THREADS];
+
+static pthread_barrier_t barrier;
+
+static void *alloc_states(void *_job)
+{
+   struct job *job = _job;
+
+   pthread_barrier_wait(&barrier);
+
+   for (unsigned i = 0; i < STATES_PER_THREAD; i++) {
+      struct anv_state state = anv_state_pool_alloc(job->pool,
+                                                    job->state_size,
+                                                    job->state_alignment);
+      job->states[i] = state;
+   }
+
+   return NULL;
+}
+
+static void run_test(uint32_t state_size,
+                     uint32_t state_alignment,
+                     uint32_t block_size,
+                     uint32_t pool_max_size)
+{
+   struct anv_physical_device physical_device = { };
+   struct anv_device device = {};
+   struct anv_state_pool state_pool;
+
+   test_device_info_init(&physical_device.info);
+   anv_device_set_physical(&device, &physical_device);
+   device.kmd_backend = anv_kmd_backend_get(INTEL_KMD_TYPE_STUB);
+   pthread_mutex_init(&device.mutex, NULL);
+   anv_bo_cache_init(&device.bo_cache, &device);
+   anv_state_pool_init(&state_pool, &device,
+                       &(struct anv_state_pool_params) {
+                          .name         = "test",
+                          .base_address = 4096,
+                          .start_offset = 0,
+                          .block_size   = block_size,
+                          .max_size     = pool_max_size,
+                       });
+
+   pthread_barrier_init(&barrier, NULL, NUM_THREADS);
+
+   for (unsigned i = 0; i < ARRAY_SIZE(jobs); i++) {
+      jobs[i].state_size = state_size;
+      jobs[i].state_alignment = state_alignment;
+      jobs[i].pool = &state_pool;
+      pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]);
+   }
+
+   for (unsigned i = 0; i < ARRAY_SIZE(jobs); i++)
+      pthread_join(jobs[i].thread, NULL);
+
+   const uint32_t expected_allocation_fails =
+      (NUM_THREADS * STATES_PER_THREAD * block_size) > pool_max_size ?
+      ((NUM_THREADS * STATES_PER_THREAD) - (pool_max_size / block_size)) : 0;
+   uint32_t allocation_fails = 0;
+   for (unsigned j = 0; j < ARRAY_SIZE(jobs); j++) {
+      int64_t last_state_offset = -1;
+      for (unsigned s = 0; s < ARRAY_SIZE(jobs[j].states); s++) {
+         if (jobs[j].states[s].alloc_size) {
+            ASSERT(last_state_offset < jobs[j].states[s].offset);
+            last_state_offset = jobs[j].states[s].offset;
+         } else {
+            allocation_fails++;
+         }
+      }
+   }
+
+   ASSERT(allocation_fails == expected_allocation_fails);
+
+   anv_state_pool_finish(&state_pool);
+   anv_bo_cache_finish(&device.bo_cache);
+   pthread_mutex_destroy(&device.mutex);
+}
+
+void state_pool_max_size_within_limit(void);
+
+void state_pool_max_size_within_limit(void)
+{
+   for (unsigned i = 0; i < NUM_RUNS; i++)
+      run_test(16, 16, 64, 64 * NUM_THREADS * STATES_PER_THREAD);
+}
+
+void state_pool_max_size_over_limit(void);
+
+void state_pool_max_size_over_limit(void)
+{
+   for (unsigned i = 0; i < NUM_RUNS; i++)
+      run_test(16, 16, 64, 16 * NUM_THREADS * STATES_PER_THREAD);
+}