diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h index 9ac02e058e9..b9d76c52061 100644 --- a/src/gallium/include/winsys/radeon_winsys.h +++ b/src/gallium/include/winsys/radeon_winsys.h @@ -63,6 +63,7 @@ enum radeon_bo_flag * This guarantees that this buffer will never be moved to GTT. */ RADEON_FLAG_DISCARDABLE = (1 << 10), + RADEON_FLAG_WINSYS_SLAB_BACKING = (1 << 11), /* only used by the winsys */ }; static inline void diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index a999d7e2882..e6db491873a 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -221,7 +221,7 @@ static void amdgpu_bo_destroy_or_cache(struct radeon_winsys *rws, struct pb_buff assert(is_real_bo(bo)); /* slab buffers have a separate vtbl */ - if (bo->type == AMDGPU_BO_REAL_REUSABLE) + if (bo->type >= AMDGPU_BO_REAL_REUSABLE) pb_cache_add_buffer(&((struct amdgpu_bo_real_reusable*)bo)->cache_entry); else amdgpu_bo_destroy(ws, _buf); @@ -469,13 +469,20 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, alignment = amdgpu_get_optimal_alignment(ws, size, alignment); if (heap >= 0 && flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) { - struct amdgpu_bo_real_reusable *new_bo = CALLOC_STRUCT(amdgpu_bo_real_reusable); + struct amdgpu_bo_real_reusable *new_bo; + bool slab_backing = flags & RADEON_FLAG_WINSYS_SLAB_BACKING; + + if (slab_backing) + new_bo = (struct amdgpu_bo_real_reusable *)CALLOC_STRUCT(amdgpu_bo_real_reusable_slab); + else + new_bo = CALLOC_STRUCT(amdgpu_bo_real_reusable); + if (!new_bo) return NULL; bo = &new_bo->b; pb_cache_init_entry(&ws->bo_cache, &new_bo->cache_entry, &bo->b.base, heap); - bo->b.type = AMDGPU_BO_REAL_REUSABLE; + bo->b.type = slab_backing ? AMDGPU_BO_REAL_REUSABLE_SLAB : AMDGPU_BO_REAL_REUSABLE; } else { bo = CALLOC_STRUCT(amdgpu_bo_real); if (!bo) @@ -659,14 +666,10 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_s unsigned group_index) { struct amdgpu_winsys *ws = priv; - struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab); enum radeon_bo_domain domains = radeon_domain_from_heap(heap); enum radeon_bo_flag flags = radeon_flags_from_heap(heap); uint32_t base_id; - if (!slab) - return NULL; - /* Determine the slab buffer size. */ unsigned max_entry_size = 1 << (ws->bo_slabs.min_order + ws->bo_slabs.num_orders - 1); @@ -695,78 +698,81 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_s */ slab_size = MAX2(slab_size, ws->info.pte_fragment_size); - slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(ws, - slab_size, slab_size, - domains, flags)); - if (!slab->buffer) - goto fail; + flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | + RADEON_FLAG_NO_SUBALLOC | + RADEON_FLAG_WINSYS_SLAB_BACKING; + + struct amdgpu_bo_real_reusable_slab *slab_bo = + (struct amdgpu_bo_real_reusable_slab*)amdgpu_bo_create(ws, slab_size, slab_size, + domains, flags); + if (!slab_bo) + return NULL; + + /* The slab is not suballocated. */ + assert(is_real_bo(&slab_bo->b.b.b)); + assert(slab_bo->b.b.b.type == AMDGPU_BO_REAL_REUSABLE_SLAB); /* We can get a buffer from pb_cache that is slightly larger. */ - slab_size = slab->buffer->base.size; + slab_size = slab_bo->b.b.b.base.size; - slab->base.num_entries = slab_size / entry_size; - slab->base.num_free = slab->base.num_entries; - slab->base.group_index = group_index; - slab->base.entry_size = entry_size; - slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); - if (!slab->entries) - goto fail_buffer; + slab_bo->slab.num_entries = slab_size / entry_size; + slab_bo->slab.num_free = slab_bo->slab.num_entries; + slab_bo->slab.group_index = group_index; + slab_bo->slab.entry_size = entry_size; + slab_bo->entries = CALLOC(slab_bo->slab.num_entries, sizeof(*slab_bo->entries)); + if (!slab_bo->entries) + goto fail; - list_inithead(&slab->base.free); + list_inithead(&slab_bo->slab.free); - base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries); + base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab_bo->slab.num_entries); - for (unsigned i = 0; i < slab->base.num_entries; ++i) { - struct amdgpu_bo_slab_entry *bo = &slab->entries[i]; + for (unsigned i = 0; i < slab_bo->slab.num_entries; ++i) { + struct amdgpu_bo_slab_entry *bo = &slab_bo->entries[i]; bo->b.base.placement = domains; bo->b.base.alignment_log2 = util_logbase2(get_slab_entry_alignment(ws, entry_size)); bo->b.base.size = entry_size; bo->b.type = AMDGPU_BO_SLAB_ENTRY; - bo->b.va = slab->buffer->va + i * entry_size; + bo->b.va = slab_bo->b.b.b.va + i * entry_size; bo->b.unique_id = base_id + i; - /* The slab is not suballocated. */ - assert(is_real_bo(slab->buffer)); - bo->real = get_real_bo(slab->buffer); + bo->real = &slab_bo->b.b; - bo->entry.slab = &slab->base; - list_addtail(&bo->entry.head, &slab->base.free); + bo->entry.slab = &slab_bo->slab; + list_addtail(&bo->entry.head, &slab_bo->slab.free); } /* Wasted alignment due to slabs with 3/4 allocations being aligned to a power of two. */ - assert(slab->base.num_entries * entry_size <= slab_size); + assert(slab_bo->slab.num_entries * entry_size <= slab_size); if (domains & RADEON_DOMAIN_VRAM) - ws->slab_wasted_vram += slab_size - slab->base.num_entries * entry_size; + ws->slab_wasted_vram += slab_size - slab_bo->slab.num_entries * entry_size; else - ws->slab_wasted_gtt += slab_size - slab->base.num_entries * entry_size; + ws->slab_wasted_gtt += slab_size - slab_bo->slab.num_entries * entry_size; - return &slab->base; + return &slab_bo->slab; -fail_buffer: - amdgpu_winsys_bo_reference(ws, &slab->buffer, NULL); fail: - FREE(slab); + amdgpu_winsys_bo_reference(ws, (struct amdgpu_winsys_bo**)&slab_bo, NULL); return NULL; } -void amdgpu_bo_slab_free(struct amdgpu_winsys *ws, struct pb_slab *pslab) +void amdgpu_bo_slab_free(struct amdgpu_winsys *ws, struct pb_slab *slab) { - struct amdgpu_slab *slab = amdgpu_slab(pslab); - unsigned slab_size = slab->buffer->base.size; + struct amdgpu_bo_real_reusable_slab *bo = get_bo_from_slab(slab); + unsigned slab_size = bo->b.b.b.base.size; - assert(slab->base.num_entries * slab->base.entry_size <= slab_size); - if (slab->buffer->base.placement & RADEON_DOMAIN_VRAM) - ws->slab_wasted_vram -= slab_size - slab->base.num_entries * slab->base.entry_size; + assert(bo->slab.num_entries * bo->slab.entry_size <= slab_size); + if (bo->b.b.b.base.placement & RADEON_DOMAIN_VRAM) + ws->slab_wasted_vram -= slab_size - bo->slab.num_entries * bo->slab.entry_size; else - ws->slab_wasted_gtt -= slab_size - slab->base.num_entries * slab->base.entry_size; + ws->slab_wasted_gtt -= slab_size - bo->slab.num_entries * bo->slab.entry_size; - for (unsigned i = 0; i < slab->base.num_entries; ++i) - amdgpu_bo_remove_fences(&slab->entries[i].b); + for (unsigned i = 0; i < bo->slab.num_entries; ++i) + amdgpu_bo_remove_fences(&bo->entries[i].b); - FREE(slab->entries); - amdgpu_winsys_bo_reference(ws, &slab->buffer, NULL); - FREE(slab); + FREE(bo->entries); + amdgpu_winsys_bo_reference(ws, (struct amdgpu_winsys_bo**)&bo, NULL); } #if DEBUG_SPARSE_COMMITS @@ -1401,8 +1407,31 @@ no_slab: /* Get a buffer from the cache. */ bo = (struct amdgpu_winsys_bo*) pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap); - if (bo) + if (bo) { + /* If the buffer is amdgpu_bo_real_reusable, but we need amdgpu_bo_real_reusable_slab, + * keep the allocation but make the structure bigger. + */ + if (flags & RADEON_FLAG_WINSYS_SLAB_BACKING && bo->type == AMDGPU_BO_REAL_REUSABLE) { + const unsigned orig_size = sizeof(struct amdgpu_bo_real_reusable); + const unsigned new_size = sizeof(struct amdgpu_bo_real_reusable_slab); + struct amdgpu_winsys_bo *new_bo = + (struct amdgpu_winsys_bo*)REALLOC(bo, orig_size, new_size); + + if (!new_bo) { + amdgpu_winsys_bo_reference(ws, &bo, NULL); + return NULL; + } + + memset((uint8_t*)new_bo + orig_size, 0, new_size - orig_size); + bo = new_bo; + bo->type = AMDGPU_BO_REAL_REUSABLE_SLAB; + + /* Re-set pointers after realloc. */ + struct amdgpu_bo_real_reusable *real_bo = get_real_bo_reusable(bo); + real_bo->cache_entry.buffer = &bo->base; + } return &bo->base; + } } /* Create a new one. */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h index 833caae1e05..1d3777b2573 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h @@ -38,8 +38,9 @@ struct amdgpu_sparse_commitment { enum amdgpu_bo_type { AMDGPU_BO_SLAB_ENTRY, AMDGPU_BO_SPARSE, - AMDGPU_BO_REAL, /* only REAL enums can be present after this */ - AMDGPU_BO_REAL_REUSABLE, + AMDGPU_BO_REAL, /* only REAL enums can be present after this */ + AMDGPU_BO_REAL_REUSABLE, /* only REAL_REUSABLE enums can be present after this */ + AMDGPU_BO_REAL_REUSABLE_SLAB, }; /* Anything above REAL will use the BO list for REAL. */ @@ -124,9 +125,12 @@ struct amdgpu_bo_slab_entry { struct pb_slab_entry entry; }; -struct amdgpu_slab { - struct pb_slab base; - struct amdgpu_winsys_bo *buffer; +/* The slab buffer, which is the big backing buffer out of which smaller BOs are suballocated and + * represented by amdgpu_bo_slab_entry. It's always a real and reusable buffer. + */ +struct amdgpu_bo_real_reusable_slab { + struct amdgpu_bo_real_reusable b; + struct pb_slab slab; struct amdgpu_bo_slab_entry *entries; }; @@ -143,7 +147,7 @@ static struct amdgpu_bo_real *get_real_bo(struct amdgpu_winsys_bo *bo) static struct amdgpu_bo_real_reusable *get_real_bo_reusable(struct amdgpu_winsys_bo *bo) { - assert(bo->type == AMDGPU_BO_REAL_REUSABLE); + assert(bo->type >= AMDGPU_BO_REAL_REUSABLE); return (struct amdgpu_bo_real_reusable*)bo; } @@ -159,6 +163,11 @@ static struct amdgpu_bo_slab_entry *get_slab_entry_bo(struct amdgpu_winsys_bo *b return (struct amdgpu_bo_slab_entry*)bo; } +static inline struct amdgpu_bo_real_reusable_slab *get_bo_from_slab(struct pb_slab *slab) +{ + return container_of(slab, struct amdgpu_bo_real_reusable_slab, slab); +} + bool amdgpu_bo_can_reclaim(struct amdgpu_winsys *ws, struct pb_buffer *_buf); struct pb_buffer *amdgpu_bo_create(struct amdgpu_winsys *ws, uint64_t size, @@ -184,12 +193,6 @@ struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo) return (struct amdgpu_winsys_bo *)bo; } -static inline -struct amdgpu_slab *amdgpu_slab(struct pb_slab *slab) -{ - return (struct amdgpu_slab *)slab; -} - static inline void amdgpu_winsys_bo_reference(struct amdgpu_winsys *ws, struct amdgpu_winsys_bo **dst, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 81ab171deaf..df222d6feca 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -224,7 +224,7 @@ amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs, static inline unsigned get_buf_list_idx(struct amdgpu_winsys_bo *bo) { - /* AMDGPU_BO_REAL_REUSABLE maps to AMDGPU_BO_REAL. */ + /* AMDGPU_BO_REAL_REUSABLE* maps to AMDGPU_BO_REAL. */ static_assert(ARRAY_SIZE(((struct amdgpu_cs_context*)NULL)->buffer_lists) == NUM_BO_LIST_TYPES, ""); return MIN2(bo->type, AMDGPU_BO_REAL); }