mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 21:40:08 +01:00
anv: Add VMA allocator for shader binaries
Introduce a VMA-first chunk allocator for shader binaries to eventually replace the anv_state_pool-based implementation. This allocator works directly with GPU virtual addresses through util_vma_heap, making the virtual address space an explicit resource managed by ANV. No functional change in this commit. v2(Michael Cheng): Use existing instruction state pool anv_va_range v3(Lionel): Simplify allocator Signed-off-by: default avatarMichael Cheng <michael.cheng@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38869>
This commit is contained in:
parent
20f320b7c7
commit
1fa327ac32
5 changed files with 329 additions and 0 deletions
|
|
@ -1230,6 +1230,56 @@ struct anv_gfx_state_ptr {
|
|||
4 * _cmd_state->len); \
|
||||
} while (0)
|
||||
|
||||
#define ANV_SHADER_HEAP_MAX_BOS (128)
|
||||
|
||||
struct anv_shader_heap {
|
||||
struct anv_device *device;
|
||||
|
||||
struct anv_va_range va_range;
|
||||
|
||||
uint32_t start_pot_size;
|
||||
uint32_t base_pot_size;
|
||||
|
||||
uint64_t start_chunk_size;
|
||||
uint64_t base_chunk_size;
|
||||
|
||||
uint32_t small_chunk_count;
|
||||
|
||||
struct {
|
||||
uint64_t addr;
|
||||
uint64_t size;
|
||||
|
||||
struct anv_bo *bo;
|
||||
} bos[ANV_SHADER_HEAP_MAX_BOS];
|
||||
BITSET_DECLARE(allocated_bos, ANV_SHADER_HEAP_MAX_BOS);
|
||||
|
||||
struct util_vma_heap vma;
|
||||
simple_mtx_t mutex;
|
||||
};
|
||||
|
||||
struct anv_shader_alloc {
|
||||
uint64_t offset;
|
||||
uint64_t alloc_size;
|
||||
};
|
||||
|
||||
VkResult anv_shader_heap_init(struct anv_shader_heap *heap,
|
||||
struct anv_device *device,
|
||||
struct anv_va_range va_range,
|
||||
uint32_t start_pot_size,
|
||||
uint32_t base_pot_size);
|
||||
void anv_shader_heap_finish(struct anv_shader_heap *heap);
|
||||
|
||||
struct anv_shader_alloc anv_shader_heap_alloc(struct anv_shader_heap *heap,
|
||||
uint64_t size,
|
||||
uint64_t align,
|
||||
bool capture_replay,
|
||||
uint64_t requested_addr);
|
||||
void anv_shader_heap_free(struct anv_shader_heap *heap, struct anv_shader_alloc alloc);
|
||||
|
||||
void anv_shader_heap_upload(struct anv_shader_heap *heap,
|
||||
struct anv_shader_alloc alloc,
|
||||
const void *data, uint64_t size);
|
||||
|
||||
struct anv_shader {
|
||||
struct vk_shader vk;
|
||||
|
||||
|
|
@ -2463,6 +2513,8 @@ struct anv_device {
|
|||
struct util_vma_heap vma_dynamic_visible;
|
||||
struct util_vma_heap vma_trtt;
|
||||
|
||||
struct anv_shader_heap shader_heap;
|
||||
|
||||
/** List of all anv_device_memory objects */
|
||||
struct list_head memory_objects;
|
||||
|
||||
|
|
|
|||
209
src/intel/vulkan/anv_shader_heap.c
Normal file
209
src/intel/vulkan/anv_shader_heap.c
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
/* Copyright © 2025 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
static inline uint32_t
|
||||
shader_bo_index(struct anv_shader_heap *heap, uint64_t addr)
|
||||
{
|
||||
uint64_t alloc_offset = addr - heap->va_range.addr;
|
||||
|
||||
unsigned b;
|
||||
if (alloc_offset < heap->base_chunk_size) {
|
||||
b = alloc_offset < heap->start_chunk_size ? 0 :
|
||||
(util_last_bit64(alloc_offset) - heap->start_pot_size);
|
||||
assert(b < heap->small_chunk_count);
|
||||
return b;
|
||||
} else if (alloc_offset >= (heap->va_range.size - heap->base_chunk_size)) {
|
||||
alloc_offset = heap->va_range.size - alloc_offset - 1;
|
||||
b = alloc_offset < heap->start_chunk_size ? 0 :
|
||||
(util_last_bit64(alloc_offset) - heap->start_pot_size);
|
||||
assert(b < heap->small_chunk_count);
|
||||
b = heap->small_chunk_count + b;
|
||||
} else {
|
||||
b = 2 * heap->small_chunk_count +
|
||||
(alloc_offset / heap->base_chunk_size) - 1;
|
||||
}
|
||||
|
||||
assert(addr >= heap->bos[b].addr &&
|
||||
addr < (heap->bos[b].addr + heap->bos[b].size));
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_shader_heap_init(struct anv_shader_heap *heap,
|
||||
struct anv_device *device,
|
||||
struct anv_va_range va_range,
|
||||
uint32_t start_pot_size,
|
||||
uint32_t base_pot_size)
|
||||
{
|
||||
assert((1ull << start_pot_size) >= device->info->mem_alignment);
|
||||
assert(base_pot_size >= start_pot_size);
|
||||
assert(va_range.size % (1ull << base_pot_size) == 0);
|
||||
assert((DIV_ROUND_UP(va_range.size, (1ull << base_pot_size)) -
|
||||
(base_pot_size - start_pot_size) - 1) <
|
||||
ARRAY_SIZE(heap->bos));
|
||||
|
||||
memset(heap, 0, sizeof(*heap));
|
||||
|
||||
heap->start_pot_size = start_pot_size;
|
||||
heap->base_pot_size = base_pot_size;
|
||||
heap->start_chunk_size = 1ull << start_pot_size;
|
||||
heap->base_chunk_size = 1ull << base_pot_size;
|
||||
heap->small_chunk_count = base_pot_size - start_pot_size + 1;
|
||||
heap->device = device;
|
||||
heap->va_range = va_range;
|
||||
|
||||
for (uint32_t i = 0; i < heap->small_chunk_count; i++) {
|
||||
heap->bos[i].size =
|
||||
heap->bos[heap->small_chunk_count + i].size =
|
||||
1ull << (i == 0 ? start_pot_size : (start_pot_size + i - 1));
|
||||
|
||||
|
||||
heap->bos[i].addr = heap->va_range.addr +
|
||||
(i == 0 ? 0 : (1ull << (start_pot_size + i - 1)));
|
||||
heap->bos[heap->small_chunk_count + i].addr =
|
||||
heap->va_range.addr + heap->va_range.size -
|
||||
(1ull << (start_pot_size + i));
|
||||
}
|
||||
|
||||
const uint64_t base_chunks_size =
|
||||
heap->va_range.size - 2 * heap->base_chunk_size;
|
||||
for (uint32_t i = 0; i < base_chunks_size / heap->base_chunk_size; i++) {
|
||||
heap->bos[2 * heap->small_chunk_count + i].addr =
|
||||
heap->va_range.addr + heap->base_chunk_size + i * heap->base_chunk_size;
|
||||
heap->bos[2 * heap->small_chunk_count + i].size = heap->base_chunk_size;
|
||||
}
|
||||
|
||||
simple_mtx_init(&heap->mutex, mtx_plain);
|
||||
util_vma_heap_init(&heap->vma, va_range.addr, va_range.size - 64);
|
||||
|
||||
BITSET_ZERO(heap->allocated_bos);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_shader_heap_finish(struct anv_shader_heap *heap)
|
||||
{
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(heap->bos); i++) {
|
||||
if (heap->bos[i].bo) {
|
||||
ANV_DMR_BO_FREE(&heap->device->vk.base, heap->bos[i].bo);
|
||||
anv_device_release_bo(heap->device, heap->bos[i].bo);
|
||||
heap->bos[i].bo = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
util_vma_heap_finish(&heap->vma);
|
||||
simple_mtx_destroy(&heap->mutex);
|
||||
}
|
||||
|
||||
struct anv_shader_alloc
|
||||
anv_shader_heap_alloc(struct anv_shader_heap *heap,
|
||||
uint64_t size,
|
||||
uint64_t align,
|
||||
bool capture_replay,
|
||||
uint64_t requested_addr)
|
||||
{
|
||||
assert(align <= heap->base_chunk_size);
|
||||
assert(size <= heap->base_chunk_size);
|
||||
|
||||
simple_mtx_lock(&heap->mutex);
|
||||
|
||||
heap->vma.nospan_shift = MAX2(21, util_last_bit64(size) - 1);
|
||||
if ((1ull << heap->vma.nospan_shift) < size)
|
||||
heap->vma.nospan_shift++;
|
||||
|
||||
uint64_t addr = 0;
|
||||
if (requested_addr) {
|
||||
if (util_vma_heap_alloc_addr(&heap->vma,
|
||||
requested_addr, size)) {
|
||||
addr = requested_addr;
|
||||
}
|
||||
} else {
|
||||
if (capture_replay) {
|
||||
heap->vma.alloc_high = false;
|
||||
addr = util_vma_heap_alloc(&heap->vma, size, align);
|
||||
} else {
|
||||
heap->vma.alloc_high = true;
|
||||
addr = util_vma_heap_alloc(&heap->vma, size, align);
|
||||
}
|
||||
}
|
||||
|
||||
struct anv_shader_alloc alloc = {};
|
||||
|
||||
if (addr != 0) {
|
||||
const uint32_t bo_begin_idx = shader_bo_index(heap, addr);
|
||||
const uint32_t bo_end_idx = shader_bo_index(heap, addr + size - 1);
|
||||
for (uint32_t i = MIN2(bo_begin_idx, bo_end_idx);
|
||||
i <= MAX2(bo_begin_idx, bo_end_idx); i++) {
|
||||
if (heap->bos[i].bo != NULL)
|
||||
continue;
|
||||
|
||||
VkResult result =
|
||||
anv_device_alloc_bo(heap->device, "shaders",
|
||||
heap->bos[i].size,
|
||||
ANV_BO_ALLOC_FIXED_ADDRESS |
|
||||
ANV_BO_ALLOC_MAPPED |
|
||||
ANV_BO_ALLOC_HOST_CACHED_COHERENT |
|
||||
ANV_BO_ALLOC_CAPTURE |
|
||||
ANV_BO_ALLOC_INTERNAL,
|
||||
heap->bos[i].addr,
|
||||
&heap->bos[i].bo);
|
||||
ANV_DMR_BO_ALLOC(&heap->device->vk.base, heap->bos[i].bo, result);
|
||||
if (result == VK_SUCCESS)
|
||||
BITSET_SET(heap->allocated_bos, i);
|
||||
else {
|
||||
util_vma_heap_free(&heap->vma, addr, size);
|
||||
addr = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (addr != 0) {
|
||||
alloc.offset = addr - heap->va_range.addr;
|
||||
alloc.alloc_size = size;
|
||||
}
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&heap->mutex);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void
|
||||
anv_shader_heap_free(struct anv_shader_heap *heap, struct anv_shader_alloc alloc)
|
||||
{
|
||||
simple_mtx_lock(&heap->mutex);
|
||||
|
||||
util_vma_heap_free(&heap->vma, heap->va_range.addr + alloc.offset,
|
||||
alloc.alloc_size);
|
||||
|
||||
simple_mtx_unlock(&heap->mutex);
|
||||
}
|
||||
|
||||
void
|
||||
anv_shader_heap_upload(struct anv_shader_heap *heap,
|
||||
struct anv_shader_alloc alloc,
|
||||
const void *data, uint64_t data_size)
|
||||
{
|
||||
const uint32_t bo_begin_idx = shader_bo_index(
|
||||
heap, heap->va_range.addr + alloc.offset);
|
||||
const uint32_t bo_end_idx = shader_bo_index(
|
||||
heap, heap->va_range.addr + alloc.offset + data_size - 1);
|
||||
|
||||
const uint64_t upload_addr = heap->va_range.addr + alloc.offset;
|
||||
for (uint32_t i = MIN2(bo_begin_idx, bo_end_idx);
|
||||
i <= MAX2(bo_begin_idx, bo_end_idx); i++) {
|
||||
const uint64_t bo_offset =
|
||||
MAX2(upload_addr, heap->bos[i].addr) - heap->bos[i].addr;
|
||||
const uint32_t data_offset =
|
||||
upload_addr - (heap->bos[i].addr + bo_offset);
|
||||
const uint64_t copy_size =
|
||||
MIN2(heap->bos[i].size - bo_offset, data_size - data_offset);
|
||||
|
||||
memcpy(heap->bos[i].bo->map + bo_offset, data, copy_size);
|
||||
}
|
||||
}
|
||||
|
|
@ -192,6 +192,7 @@ libanv_files = files(
|
|||
'anv_sampler.c',
|
||||
'anv_shader.c',
|
||||
'anv_shader_compile.c',
|
||||
'anv_shader_heap.c',
|
||||
'anv_slab_bo.c',
|
||||
'anv_slab_bo.h',
|
||||
'anv_sparse.c',
|
||||
|
|
@ -328,6 +329,7 @@ if with_tests
|
|||
'tests/block_pool_no_free.c',
|
||||
'tests/block_pool_grow_first.c',
|
||||
'tests/block_pool_max_size.c',
|
||||
'tests/shader_heap_small_allocs.c',
|
||||
)
|
||||
|
||||
test(
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ ANV_C_TEST(BlockPool, NoFree, block_pool_no_free_test);
|
|||
ANV_C_TEST(BlockPool, GrowFirst, block_pool_grow_first_test);
|
||||
ANV_C_TEST(BlockPool, MaxSize, block_pool_max_size);
|
||||
|
||||
ANV_C_TEST(ShaderHeap, SmallAllocsLow, shader_heap_small_allocs_lo);
|
||||
ANV_C_TEST(ShaderHeap, SmallAllocsHigh, shader_heap_small_allocs_hi);
|
||||
|
||||
extern "C" void FAIL_IN_GTEST(const char *file_path, unsigned line_number, const char *msg) {
|
||||
GTEST_FAIL_AT(file_path, line_number) << msg;
|
||||
}
|
||||
|
|
|
|||
63
src/intel/vulkan/tests/shader_heap_small_allocs.c
Normal file
63
src/intel/vulkan/tests/shader_heap_small_allocs.c
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright © 2025 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "test_common.h"
|
||||
|
||||
void shader_heap_small_allocs_lo(void);
|
||||
void shader_heap_small_allocs_hi(void);
|
||||
|
||||
static void shader_heap_small_allocs(bool high)
|
||||
{
|
||||
struct anv_physical_device physical_device = {};
|
||||
struct anv_device device = {};
|
||||
struct anv_shader_heap heap;
|
||||
|
||||
test_device_info_init(&physical_device.info);
|
||||
device.vk.base.device = &device.vk;
|
||||
anv_device_set_physical(&device, &physical_device);
|
||||
device.kmd_backend = anv_kmd_backend_get(INTEL_KMD_TYPE_STUB);
|
||||
pthread_mutex_init(&device.mutex, NULL);
|
||||
anv_bo_cache_init(&device.bo_cache, &device);
|
||||
anv_shader_heap_init(&heap, &device,
|
||||
(struct anv_va_range) {
|
||||
.addr = 3ull * 1024 * 1024 * 1024,
|
||||
.size = 1ull * 1024 * 1024 * 1024,
|
||||
}, 21, 27);
|
||||
|
||||
uint32_t sizes[] = {
|
||||
64,
|
||||
3 * 64,
|
||||
12 * 64,
|
||||
16 * 64,
|
||||
233 * 64,
|
||||
1025 * 64,
|
||||
6 * 4096 + 64,
|
||||
2 * 1024 * 1024,
|
||||
4 * 1024 * 1024,
|
||||
2 * 1024 * 1024 + 2048,
|
||||
16 * 1024 * 1024 + 1024,
|
||||
};
|
||||
struct anv_shader_alloc allocs[ARRAY_SIZE(sizes)];
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(sizes); i++) {
|
||||
allocs[i] = anv_shader_heap_alloc(&heap, sizes[i], 64, high, 0);
|
||||
assert(allocs[i].alloc_size != 0);
|
||||
}
|
||||
|
||||
anv_shader_heap_finish(&heap);
|
||||
anv_bo_cache_finish(&device.bo_cache);
|
||||
pthread_mutex_destroy(&device.mutex);
|
||||
}
|
||||
|
||||
void shader_heap_small_allocs_hi()
|
||||
{
|
||||
shader_heap_small_allocs(true);
|
||||
}
|
||||
|
||||
void shader_heap_small_allocs_lo()
|
||||
{
|
||||
shader_heap_small_allocs(false);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue