winsys/amdgpu-radeon: rework how we describe heaps

It was getting difficult to add more heaps.

This adds more heaps because more flag combinations are legal now.
Invalid flag combinations are also handled better.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466>
This commit is contained in:
Marek Olšák 2022-05-10 23:29:20 -04:00
parent ceddd7d49a
commit 593f72aa21
5 changed files with 114 additions and 178 deletions

View file

@ -708,186 +708,137 @@ radeon_bo_reference(struct radeon_winsys *rws, struct pb_buffer **dst, struct pb
pb_reference_with_winsys(rws, dst, src); pb_reference_with_winsys(rws, dst, src);
} }
enum radeon_heap /* The following bits describe the heaps managed by slab allocators (pb_slab) and
{ * the allocation cache (pb_cache).
RADEON_HEAP_VRAM_NO_CPU_ACCESS, */
RADEON_HEAP_VRAM_READ_ONLY, #define RADEON_HEAP_BIT_VRAM (1 << 0) /* if false, it's GTT */
RADEON_HEAP_VRAM_READ_ONLY_32BIT, #define RADEON_HEAP_BIT_READ_ONLY (1 << 1) /* both VRAM and GTT */
RADEON_HEAP_VRAM_32BIT, #define RADEON_HEAP_BIT_32BIT (1 << 2) /* both VRAM and GTT */
RADEON_HEAP_VRAM,
RADEON_HEAP_GTT_WC,
RADEON_HEAP_GTT_WC_READ_ONLY,
RADEON_HEAP_GTT_WC_READ_ONLY_32BIT,
RADEON_HEAP_GTT_WC_32BIT,
RADEON_HEAP_GTT,
RADEON_HEAP_GTT_GL2_BYPASS_WC,
RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY,
RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT,
RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT,
RADEON_HEAP_GTT_GL2_BYPASS,
RADEON_MAX_SLAB_HEAPS,
RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS,
};
static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap heap) #define RADEON_HEAP_BIT_NO_CPU_ACCESS (1 << 3) /* VRAM only */
#define RADEON_HEAP_BIT_WC (1 << 3) /* GTT only, VRAM implies this to be true */
#define RADEON_HEAP_BIT_GL2_BYPASS (1 << 4) /* GTT only */
/* The number of all possible heap descriptions using the bits above. */
#define RADEON_NUM_HEAPS (1 << 5)
static inline enum radeon_bo_domain radeon_domain_from_heap(int heap)
{ {
switch (heap) { assert(heap >= 0);
case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
case RADEON_HEAP_VRAM_READ_ONLY: if (heap & RADEON_HEAP_BIT_VRAM)
case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
case RADEON_HEAP_VRAM_32BIT:
case RADEON_HEAP_VRAM:
return RADEON_DOMAIN_VRAM; return RADEON_DOMAIN_VRAM;
case RADEON_HEAP_GTT_WC: else
case RADEON_HEAP_GTT_WC_READ_ONLY:
case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
case RADEON_HEAP_GTT_WC_32BIT:
case RADEON_HEAP_GTT:
case RADEON_HEAP_GTT_GL2_BYPASS_WC:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT:
case RADEON_HEAP_GTT_GL2_BYPASS:
return RADEON_DOMAIN_GTT; return RADEON_DOMAIN_GTT;
default:
assert(0);
return (enum radeon_bo_domain)0;
}
} }
static inline unsigned radeon_flags_from_heap(enum radeon_heap heap) static inline unsigned radeon_flags_from_heap(int heap)
{ {
assert(heap >= 0);
unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING; unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING;
switch (heap) { if (heap & RADEON_HEAP_BIT_READ_ONLY)
case RADEON_HEAP_GTT:
case RADEON_HEAP_GTT_GL2_BYPASS:
break;
default:
flags |= RADEON_FLAG_GTT_WC;
}
switch (heap) {
case RADEON_HEAP_GTT_GL2_BYPASS_WC:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT:
case RADEON_HEAP_GTT_GL2_BYPASS:
flags |= RADEON_FLAG_GL2_BYPASS;
break;
default:
break;
}
switch (heap) {
case RADEON_HEAP_VRAM_READ_ONLY:
case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
case RADEON_HEAP_GTT_WC_READ_ONLY:
case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT:
flags |= RADEON_FLAG_READ_ONLY; flags |= RADEON_FLAG_READ_ONLY;
break; if (heap & RADEON_HEAP_BIT_32BIT)
default:
break;
}
switch (heap) {
case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
case RADEON_HEAP_VRAM_32BIT:
case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
case RADEON_HEAP_GTT_WC_32BIT:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT:
case RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT:
flags |= RADEON_FLAG_32BIT; flags |= RADEON_FLAG_32BIT;
FALLTHROUGH;
default:
break;
}
switch (heap) { if (heap & RADEON_HEAP_BIT_VRAM) {
case RADEON_HEAP_VRAM_NO_CPU_ACCESS: flags |= RADEON_FLAG_GTT_WC;
flags |= RADEON_FLAG_NO_CPU_ACCESS; if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS)
break; flags |= RADEON_FLAG_NO_CPU_ACCESS;
default: } else {
break; /* GTT only */
if (heap & RADEON_HEAP_BIT_WC)
flags |= RADEON_FLAG_GTT_WC;
if (heap & RADEON_HEAP_BIT_GL2_BYPASS)
flags |= RADEON_FLAG_GL2_BYPASS;
} }
return flags; return flags;
} }
/* This cleans up flags, so that we can comfortably assume that no invalid flag combinations
* are set.
*/
static void radeon_canonicalize_bo_flags(enum radeon_bo_domain *_domain,
enum radeon_bo_flag *_flags)
{
unsigned domain = *_domain;
unsigned flags = *_flags;
/* Only set 1 domain, e.g. ignore GTT if VRAM is set. */
if (domain)
domain = BITFIELD_BIT(ffs(domain) - 1);
else
domain = RADEON_DOMAIN_VRAM;
switch (domain) {
case RADEON_DOMAIN_VRAM:
flags |= RADEON_FLAG_GTT_WC;
flags &= ~RADEON_FLAG_GL2_BYPASS;
break;
case RADEON_DOMAIN_GTT:
flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
break;
case RADEON_DOMAIN_GDS:
case RADEON_DOMAIN_OA:
flags |= RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_NO_CPU_ACCESS;
flags &= ~RADEON_FLAG_SPARSE;
break;
}
/* Sparse buffers must have NO_CPU_ACCESS set. */
if (flags & RADEON_FLAG_SPARSE)
flags |= RADEON_FLAG_NO_CPU_ACCESS;
*_domain = (enum radeon_bo_domain)domain;
*_flags = (enum radeon_bo_flag)flags;
}
/* Return the heap index for winsys allocators, or -1 on failure. */ /* Return the heap index for winsys allocators, or -1 on failure. */
static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags) static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags)
{ {
bool uncached; radeon_canonicalize_bo_flags(&domain, &flags);
/* VRAM implies WC (write combining) */
assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
/* NO_CPU_ACCESS implies VRAM only. */
assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM);
/* Resources with interprocess sharing don't use any winsys allocators. */ /* Resources with interprocess sharing don't use any winsys allocators. */
if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)) if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING))
return -1; return -1;
/* Unsupported flags: NO_SUBALLOC, SPARSE. */ /* These are unsupported flags. */
if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GL2_BYPASS | /* RADEON_FLAG_DRIVER_INTERNAL is ignored. It doesn't affect allocators. */
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT | /* TODO: handle ENCRYPTED better */
RADEON_FLAG_DRIVER_INTERNAL)) if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE | RADEON_FLAG_ENCRYPTED))
return -1; return -1;
switch (domain) { int heap = 0;
case RADEON_DOMAIN_VRAM:
switch (flags & (RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) {
case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY:
assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense");
return -1;
case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT:
assert(!"NO_CPU_ACCESS with 32BIT is disallowed");
return -1;
case RADEON_FLAG_NO_CPU_ACCESS:
return RADEON_HEAP_VRAM_NO_CPU_ACCESS;
case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
return RADEON_HEAP_VRAM_READ_ONLY_32BIT;
case RADEON_FLAG_READ_ONLY:
return RADEON_HEAP_VRAM_READ_ONLY;
case RADEON_FLAG_32BIT:
return RADEON_HEAP_VRAM_32BIT;
case 0:
return RADEON_HEAP_VRAM;
}
break;
case RADEON_DOMAIN_GTT:
uncached = flags & RADEON_FLAG_GL2_BYPASS;
switch (flags & (RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) { if (flags & RADEON_FLAG_READ_ONLY)
case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: heap |= RADEON_HEAP_BIT_READ_ONLY;
return uncached ? RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT if (flags & RADEON_FLAG_32BIT)
: RADEON_HEAP_GTT_WC_READ_ONLY_32BIT; heap |= RADEON_HEAP_BIT_32BIT;
case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY:
return uncached ? RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY if (domain == RADEON_DOMAIN_VRAM) {
: RADEON_HEAP_GTT_WC_READ_ONLY; /* VRAM | GTT shouldn't occur, but if it does, ignore GTT. */
case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT: heap |= RADEON_HEAP_BIT_VRAM;
return uncached ? RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT if (flags & RADEON_FLAG_NO_CPU_ACCESS)
: RADEON_HEAP_GTT_WC_32BIT; heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS;
case RADEON_FLAG_GTT_WC: /* RADEON_FLAG_WC is ignored and implied to be true for VRAM */
return uncached ? RADEON_HEAP_GTT_GL2_BYPASS_WC : RADEON_HEAP_GTT_WC; /* RADEON_FLAG_GL2_BYPASS is ignored and implied to be false for VRAM */
case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: } else if (domain == RADEON_DOMAIN_GTT) {
case RADEON_FLAG_READ_ONLY: /* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */
assert(!"READ_ONLY without WC is disallowed"); if (flags & RADEON_FLAG_GTT_WC)
return -1; heap |= RADEON_HEAP_BIT_WC;
case RADEON_FLAG_32BIT: if (flags & RADEON_FLAG_GL2_BYPASS)
assert(!"32BIT without WC is disallowed"); heap |= RADEON_HEAP_BIT_GL2_BYPASS;
return -1; /* RADEON_FLAG_NO_CPU_ACCESS is ignored and implied to be false for GTT */
case 0: } else {
return uncached ? RADEON_HEAP_GTT_GL2_BYPASS : RADEON_HEAP_GTT; return -1; /* */
}
break;
default:
break;
} }
return -1;
assert(heap < RADEON_NUM_HEAPS);
return heap;
} }
typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *, typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *,

View file

@ -1362,17 +1362,7 @@ amdgpu_bo_create(struct amdgpu_winsys *ws,
struct amdgpu_winsys_bo *bo; struct amdgpu_winsys_bo *bo;
int heap = -1; int heap = -1;
if (domain & (RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA)) radeon_canonicalize_bo_flags(&domain, &flags);
flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_SUBALLOC;
/* VRAM implies WC. This is not optional. */
assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
/* NO_CPU_ACCESS is not valid with GTT. */
assert(!(domain & RADEON_DOMAIN_GTT) || !(flags & RADEON_FLAG_NO_CPU_ACCESS));
/* Sparse buffers must have NO_CPU_ACCESS set. */
assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS);
struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ? struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ?
ws->bo_slabs_encrypted : ws->bo_slabs; ws->bo_slabs_encrypted : ws->bo_slabs;
@ -1385,7 +1375,7 @@ amdgpu_bo_create(struct amdgpu_winsys *ws,
struct pb_slab_entry *entry; struct pb_slab_entry *entry;
int heap = radeon_get_heap_index(domain, flags); int heap = radeon_get_heap_index(domain, flags);
if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) if (heap < 0 || heap >= RADEON_NUM_HEAPS)
goto no_slab; goto no_slab;
unsigned alloc_size = size; unsigned alloc_size = size;
@ -1457,7 +1447,7 @@ no_slab:
if (use_reusable_pool) { if (use_reusable_pool) {
heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_ENCRYPTED); heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_ENCRYPTED);
assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); assert(heap >= 0 && heap < RADEON_NUM_HEAPS);
/* Get a buffer from the cache. */ /* Get a buffer from the cache. */
bo = (struct amdgpu_winsys_bo*) bo = (struct amdgpu_winsys_bo*)

View file

@ -451,7 +451,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
goto fail_alloc; goto fail_alloc;
/* Create managers. */ /* Create managers. */
pb_cache_init(&aws->bo_cache, RADEON_MAX_CACHED_HEAPS, pb_cache_init(&aws->bo_cache, RADEON_NUM_HEAPS,
500000, aws->check_vm ? 1.0f : 2.0f, 0, 500000, aws->check_vm ? 1.0f : 2.0f, 0,
(aws->info.vram_size + aws->info.gart_size) / 8, aws, (aws->info.vram_size + aws->info.gart_size) / 8, aws,
/* Cast to void* because one of the function parameters /* Cast to void* because one of the function parameters
@ -471,7 +471,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
if (!pb_slabs_init(&aws->bo_slabs[i], if (!pb_slabs_init(&aws->bo_slabs[i],
min_order, max_order, min_order, max_order,
RADEON_MAX_SLAB_HEAPS, true, RADEON_NUM_HEAPS, true,
aws, aws,
amdgpu_bo_can_reclaim_slab, amdgpu_bo_can_reclaim_slab,
amdgpu_bo_slab_alloc_normal, amdgpu_bo_slab_alloc_normal,
@ -486,7 +486,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
if (aws->info.has_tmz_support && if (aws->info.has_tmz_support &&
!pb_slabs_init(&aws->bo_slabs_encrypted[i], !pb_slabs_init(&aws->bo_slabs_encrypted[i],
min_order, max_order, min_order, max_order,
RADEON_MAX_SLAB_HEAPS, true, RADEON_NUM_HEAPS, true,
aws, aws,
amdgpu_bo_can_reclaim_slab, amdgpu_bo_can_reclaim_slab,
amdgpu_bo_slab_alloc_encrypted, amdgpu_bo_slab_alloc_encrypted,

View file

@ -1013,19 +1013,14 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
struct radeon_bo *bo; struct radeon_bo *bo;
int heap = -1; int heap = -1;
radeon_canonicalize_bo_flags(&domain, &flags);
assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */ assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
/* Only 32-bit sizes are supported. */ /* Only 32-bit sizes are supported. */
if (size > UINT_MAX) if (size > UINT_MAX)
return NULL; return NULL;
/* VRAM implies WC. This is not optional. */
if (domain & RADEON_DOMAIN_VRAM)
flags |= RADEON_FLAG_GTT_WC;
/* NO_CPU_ACCESS is valid with VRAM only. */
if (domain != RADEON_DOMAIN_VRAM)
flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
/* Sub-allocate small buffers from slabs. */ /* Sub-allocate small buffers from slabs. */
if (!(flags & RADEON_FLAG_NO_SUBALLOC) && if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) && size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
@ -1034,7 +1029,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
struct pb_slab_entry *entry; struct pb_slab_entry *entry;
int heap = radeon_get_heap_index(domain, flags); int heap = radeon_get_heap_index(domain, flags);
if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) if (heap < 0 || heap >= RADEON_NUM_HEAPS)
goto no_slab; goto no_slab;
entry = pb_slab_alloc(&ws->bo_slabs, size, heap); entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
@ -1070,7 +1065,7 @@ no_slab:
/* Shared resources don't use cached heaps. */ /* Shared resources don't use cached heaps. */
if (use_reusable_pool) { if (use_reusable_pool) {
heap = radeon_get_heap_index(domain, flags); heap = radeon_get_heap_index(domain, flags);
assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); assert(heap >= 0 && heap < RADEON_NUM_HEAPS);
bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
0, heap)); 0, heap));

View file

@ -851,7 +851,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
if (!do_winsys_init(ws)) if (!do_winsys_init(ws))
goto fail1; goto fail1;
pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, pb_cache_init(&ws->bo_cache, RADEON_NUM_HEAPS,
500000, ws->check_vm ? 1.0f : 2.0f, 0, 500000, ws->check_vm ? 1.0f : 2.0f, 0,
MIN2(ws->info.vram_size, ws->info.gart_size), NULL, MIN2(ws->info.vram_size, ws->info.gart_size), NULL,
radeon_bo_destroy, radeon_bo_destroy,
@ -864,7 +864,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
*/ */
if (!pb_slabs_init(&ws->bo_slabs, if (!pb_slabs_init(&ws->bo_slabs,
RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2, RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2,
RADEON_MAX_SLAB_HEAPS, false, RADEON_NUM_HEAPS, false,
ws, ws,
radeon_bo_can_reclaim_slab, radeon_bo_can_reclaim_slab,
radeon_bo_slab_alloc, radeon_bo_slab_alloc,