anv: setup the TR-TT vma heap

"16TB ought to be enough for anybody."
      - Probably some Intel graphics hardware engineer

TR-TT addresses are fixed regardless of the platform's gtt_size.
Unconditionally reserve this space for it: our total 48bit address
space is 256tb and TR-TT takes 16tb out of it (1/16th).

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26036>
This commit is contained in:
Paulo Zanoni 2023-10-16 16:27:57 -07:00 committed by Marge Bot
parent 0a120edfb8
commit c2db19f496
3 changed files with 49 additions and 5 deletions

View file

@ -3212,6 +3212,10 @@ VkResult anv_CreateDevice(
device->physical->va.direct_descriptor_pool.size);
}
util_vma_heap_init(&device->vma_trtt,
device->physical->va.trtt.addr,
device->physical->va.trtt.size);
list_inithead(&device->memory_objects);
list_inithead(&device->image_private_objects);
@ -3666,6 +3670,7 @@ VkResult anv_CreateDevice(
fail_mutex:
pthread_mutex_destroy(&device->mutex);
fail_vmas:
util_vma_heap_finish(&device->vma_trtt);
util_vma_heap_finish(&device->vma_desc);
util_vma_heap_finish(&device->vma_hi);
util_vma_heap_finish(&device->vma_lo);
@ -3777,6 +3782,7 @@ void anv_DestroyDevice(
anv_bo_cache_finish(&device->bo_cache);
util_vma_heap_finish(&device->vma_trtt);
util_vma_heap_finish(&device->vma_desc);
util_vma_heap_finish(&device->vma_hi);
util_vma_heap_finish(&device->vma_lo);
@ -3835,6 +3841,9 @@ static struct util_vma_heap *
anv_vma_heap_for_flags(struct anv_device *device,
enum anv_bo_alloc_flags alloc_flags)
{
if (alloc_flags & ANV_BO_ALLOC_TRTT)
return &device->vma_trtt;
if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
return &device->vma_lo;
@ -3857,7 +3866,8 @@ anv_vma_alloc(struct anv_device *device,
*out_vma_heap = anv_vma_heap_for_flags(device, alloc_flags);
if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
assert(*out_vma_heap == &device->vma_hi);
assert(*out_vma_heap == &device->vma_hi ||
*out_vma_heap == &device->vma_trtt);
if (client_address) {
if (util_vma_heap_alloc_addr(*out_vma_heap,
@ -3891,7 +3901,8 @@ anv_vma_free(struct anv_device *device,
{
assert(vma_heap == &device->vma_lo ||
vma_heap == &device->vma_hi ||
vma_heap == &device->vma_desc);
vma_heap == &device->vma_desc ||
vma_heap == &device->vma_trtt);
const uint64_t addr_48b = intel_48b_address(address);

View file

@ -400,6 +400,12 @@ enum anv_bo_alloc_flags {
/** For descriptor pools */
ANV_BO_ALLOC_DESCRIPTOR_POOL = (1 << 13),
/** For buffers that will be bound using TR-TT.
*
* Not for buffers used as the TR-TT page tables.
*/
ANV_BO_ALLOC_TRTT = (1 << 14),
};
struct anv_bo {
@ -1009,6 +1015,7 @@ struct anv_physical_device {
* Client heap
*/
struct anv_va_range high_heap;
struct anv_va_range trtt;
} va;
/* Either we have a single vram region and it's all mappable, or we have
@ -1552,6 +1559,7 @@ struct anv_device {
struct util_vma_heap vma_lo;
struct util_vma_heap vma_hi;
struct util_vma_heap vma_desc;
struct util_vma_heap vma_trtt;
/** List of all anv_device_memory objects */
struct list_head memory_objects;

View file

@ -62,6 +62,7 @@ anv_device_print_vas(struct anv_physical_device *device)
PRINT_HEAP(indirect_push_descriptor_pool);
PRINT_HEAP(instruction_state_pool);
PRINT_HEAP(high_heap);
PRINT_HEAP(trtt);
}
void
@ -153,12 +154,36 @@ anv_physical_device_init_va_ranges(struct anv_physical_device *device)
address = align64(address, _4Gb);
address = va_add(&device->va.instruction_state_pool, address, 2 * _1Gb);
/* Leave the last 4GiB out of the high vma range, so that no state
* base address + size can overflow 48 bits. For more information see
/* What's left to do for us is to set va.high_heap and va.trtt without
* overlap, but there are a few things to be considered:
*
* The TR-TT address space is governed by the GFX_TRTT_VA_RANGE register,
* which carves out part of the address space for TR-TT and is independent
* of device->gtt_size. We use 47:44 for gen9+, the values we set here
* should be in sync with what we write to the register.
*
* If we ever gain the capability to use more than 48 bits of address space
* we'll have to adjust where we put the TR-TT space (and how we set
* GFX_TRTT_VA_RANGE).
*
* We have to leave the last 4GiB out of the high vma range, so that no
* state base address + size can overflow 48 bits. For more information see
* the comment about Wa32bitGeneralStateOffset in anv_allocator.c
*
* Despite the comment above, before we had TR-TT we were not only avoiding
* the last 4GiB of the 48bit address space, but also avoiding the last
* 4GiB from gtt_size, so let's be on the safe side and do the 4GiB
* avoiding for both the TR-TT space top and the gtt top.
*/
uint64_t user_heaps_size = device->gtt_size - address - 4 * _1Gb;
assert(device->gtt_size <= (1uLL << 48));
uint64_t trtt_start = 0xFuLL << 44;
uint64_t trtt_end = (1uLL << 48) - 4 * _1Gb;
uint64_t addressable_top = MIN2(device->gtt_size, trtt_start) - 4 * _1Gb;
uint64_t user_heaps_size = addressable_top - address;
address = va_add(&device->va.high_heap, address, user_heaps_size);
assert(address <= trtt_start);
address = va_add(&device->va.trtt, trtt_start, trtt_end - trtt_start);
if (INTEL_DEBUG(DEBUG_HEAPS))
anv_device_print_vas(device);