mesa/src/util/ralloc.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1468 lines
36 KiB
C
Raw Normal View History

/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "util/list.h"
#include "util/macros.h"
#include "util/u_math.h"
#include "util/u_printf.h"
#include "ralloc.h"
#define CANARY 0x5A1106
#if defined(__LP64__) || defined(_WIN64)
#define HEADER_ALIGN 16
#else
#define HEADER_ALIGN 8
#endif
/* Align the header's size so that ralloc() allocations will return with the
* same alignment as a libc malloc would have (8 on 32-bit GLIBC, 16 on
* 64-bit), avoiding performance penalities on x86 and alignment faults on
* ARM.
*/
struct ralloc_header
{
alignas(HEADER_ALIGN)
#ifndef NDEBUG
/* A canary value used to determine whether a pointer is ralloc'd. */
unsigned canary;
unsigned size;
#endif
struct ralloc_header *parent;
/* The first child (head of a linked list) */
struct ralloc_header *child;
/* Linked list of siblings */
struct ralloc_header *prev;
struct ralloc_header *next;
void (*destructor)(void *);
};
typedef struct ralloc_header ralloc_header;
static void unlink_block(ralloc_header *info);
static void unsafe_free(ralloc_header *info);
static ralloc_header *
get_header(const void *ptr)
{
ralloc_header *info = (ralloc_header *) (((char *) ptr) -
sizeof(ralloc_header));
assert(info->canary == CANARY);
return info;
}
#define PTR_FROM_HEADER(info) (((char *) info) + sizeof(ralloc_header))
static void
add_child(ralloc_header *parent, ralloc_header *info)
{
if (parent != NULL) {
info->parent = parent;
info->next = parent->child;
parent->child = info;
if (info->next != NULL)
info->next->prev = info;
}
}
void *
ralloc_context(const void *ctx)
{
return ralloc_size(ctx, 0);
}
void *
ralloc_size(const void *ctx, size_t size)
{
/* Some malloc allocation doesn't always align to 16 bytes even on 64 bits
* system, from Android bionic/tests/malloc_test.cpp:
* - Allocations of a size that rounds up to a multiple of 16 bytes
* must have at least 16 byte alignment.
* - Allocations of a size that rounds up to a multiple of 8 bytes and
* not 16 bytes, are only required to have at least 8 byte alignment.
*/
void *block = malloc(align64(size + sizeof(ralloc_header),
alignof(ralloc_header)));
ralloc_header *info;
ralloc_header *parent;
if (unlikely(block == NULL))
return NULL;
info = (ralloc_header *) block;
/* measurements have shown that calloc is slower (because of
* the multiplication overflow checking?), so clear things
* manually
*/
info->parent = NULL;
info->child = NULL;
info->prev = NULL;
info->next = NULL;
info->destructor = NULL;
parent = ctx != NULL ? get_header(ctx) : NULL;
add_child(parent, info);
#ifndef NDEBUG
info->canary = CANARY;
info->size = size;
#endif
return PTR_FROM_HEADER(info);
}
void *
rzalloc_size(const void *ctx, size_t size)
{
void *ptr = ralloc_size(ctx, size);
if (likely(ptr))
memset(ptr, 0, size);
return ptr;
}
/* helper function - assumes ptr != NULL */
static void *
resize(void *ptr, size_t size)
{
ralloc_header *child, *old, *info;
old = get_header(ptr);
info = realloc(old, align64(size + sizeof(ralloc_header),
alignof(ralloc_header)));
if (info == NULL)
return NULL;
/* Update parent and sibling's links to the reallocated node. */
if (info != old && info->parent != NULL) {
if (info->parent->child == old)
info->parent->child = info;
if (info->prev != NULL)
info->prev->next = info;
if (info->next != NULL)
info->next->prev = info;
}
/* Update child->parent links for all children */
for (child = info->child; child != NULL; child = child->next)
child->parent = info;
return PTR_FROM_HEADER(info);
}
void *
reralloc_size(const void *ctx, void *ptr, size_t size)
{
if (unlikely(ptr == NULL))
return ralloc_size(ctx, size);
assert(ralloc_parent(ptr) == ctx);
return resize(ptr, size);
}
void *
rerzalloc_size(const void *ctx, void *ptr, size_t old_size, size_t new_size)
{
if (unlikely(ptr == NULL))
return rzalloc_size(ctx, new_size);
assert(ralloc_parent(ptr) == ctx);
ptr = resize(ptr, new_size);
if (new_size > old_size)
memset((char *)ptr + old_size, 0, new_size - old_size);
return ptr;
}
void *
ralloc_array_size(const void *ctx, size_t size, unsigned count)
{
if (count > SIZE_MAX/size)
return NULL;
return ralloc_size(ctx, size * count);
}
void *
rzalloc_array_size(const void *ctx, size_t size, unsigned count)
{
if (count > SIZE_MAX/size)
return NULL;
return rzalloc_size(ctx, size * count);
}
void *
reralloc_array_size(const void *ctx, void *ptr, size_t size, unsigned count)
{
if (count > SIZE_MAX/size)
return NULL;
return reralloc_size(ctx, ptr, size * count);
}
void *
rerzalloc_array_size(const void *ctx, void *ptr, size_t size,
unsigned old_count, unsigned new_count)
{
if (new_count > SIZE_MAX/size)
return NULL;
return rerzalloc_size(ctx, ptr, size * old_count, size * new_count);
}
void
ralloc_free(void *ptr)
{
ralloc_header *info;
if (ptr == NULL)
return;
info = get_header(ptr);
unlink_block(info);
unsafe_free(info);
}
#ifndef NDEBUG
static size_t
ralloc_total_size_internal(const ralloc_header *info)
{
/* Count the block itself. This requires NDEBUG for the statistic. */
unsigned sum = align64(info->size + sizeof(ralloc_header),
alignof(ralloc_header));
/* Recursively count children */
ralloc_header *it = info->child;
while (it != NULL) {
sum += ralloc_total_size_internal(it);
it = it->next;
}
return sum;
}
size_t
ralloc_total_size(const void *ptr)
{
return ralloc_total_size_internal(get_header(ptr));
}
#endif
static void
unlink_block(ralloc_header *info)
{
/* Unlink from parent & siblings */
if (info->parent != NULL) {
if (info->parent->child == info)
info->parent->child = info->next;
if (info->prev != NULL)
info->prev->next = info->next;
if (info->next != NULL)
info->next->prev = info->prev;
}
info->parent = NULL;
info->prev = NULL;
info->next = NULL;
}
static void
unsafe_free(ralloc_header *info)
{
/* Recursively free any children...don't waste time unlinking them. */
ralloc_header *temp;
while (info->child != NULL) {
temp = info->child;
info->child = temp->next;
unsafe_free(temp);
}
/* Free the block itself. Call the destructor first, if any. */
if (info->destructor != NULL)
info->destructor(PTR_FROM_HEADER(info));
free(info);
}
void
ralloc_steal(const void *new_ctx, void *ptr)
{
ralloc_header *info, *parent;
if (unlikely(ptr == NULL))
return;
info = get_header(ptr);
parent = new_ctx ? get_header(new_ctx) : NULL;
unlink_block(info);
add_child(parent, info);
}
void
ralloc_adopt(const void *new_ctx, void *old_ctx)
{
ralloc_header *new_info, *old_info, *child;
if (unlikely(old_ctx == NULL))
return;
old_info = get_header(old_ctx);
new_info = get_header(new_ctx);
/* If there are no children, bail. */
if (unlikely(old_info->child == NULL))
return;
/* Set all the children's parent to new_ctx; get a pointer to the last child. */
for (child = old_info->child; child->next != NULL; child = child->next) {
child->parent = new_info;
}
child->parent = new_info;
/* Connect the two lists together; parent them to new_ctx; make old_ctx empty. */
child->next = new_info->child;
if (child->next)
child->next->prev = child;
new_info->child = old_info->child;
old_info->child = NULL;
}
void *
ralloc_parent(const void *ptr)
{
ralloc_header *info;
if (unlikely(ptr == NULL))
return NULL;
info = get_header(ptr);
return info->parent ? PTR_FROM_HEADER(info->parent) : NULL;
}
void
ralloc_set_destructor(const void *ptr, void(*destructor)(void *))
{
ralloc_header *info = get_header(ptr);
info->destructor = destructor;
}
void *
ralloc_memdup(const void *ctx, const void *mem, size_t n)
{
void *ptr = ralloc_size(ctx, n);
if (unlikely(ptr == NULL))
return NULL;
memcpy(ptr, mem, n);
return ptr;
}
char *
ralloc_strdup(const void *ctx, const char *str)
{
size_t n;
char *ptr;
if (unlikely(str == NULL))
return NULL;
n = strlen(str);
ptr = ralloc_array(ctx, char, n + 1);
memcpy(ptr, str, n);
ptr[n] = '\0';
return ptr;
}
char *
ralloc_strndup(const void *ctx, const char *str, size_t max)
{
size_t n;
char *ptr;
if (unlikely(str == NULL))
return NULL;
n = strnlen(str, max);
ptr = ralloc_array(ctx, char, n + 1);
memcpy(ptr, str, n);
ptr[n] = '\0';
return ptr;
}
/* helper routine for strcat/strncat - n is the exact amount to copy */
static bool
cat(char **dest, const char *str, size_t n)
{
char *both;
size_t existing_length;
assert(dest != NULL && *dest != NULL);
existing_length = strlen(*dest);
both = resize(*dest, existing_length + n + 1);
if (unlikely(both == NULL))
return false;
memcpy(both + existing_length, str, n);
both[existing_length + n] = '\0';
*dest = both;
return true;
}
bool
ralloc_strcat(char **dest, const char *str)
{
return cat(dest, str, strlen(str));
}
bool
ralloc_strncat(char **dest, const char *str, size_t n)
{
return cat(dest, str, strnlen(str, n));
}
bool
ralloc_str_append(char **dest, const char *str,
size_t existing_length, size_t str_size)
{
char *both;
assert(dest != NULL && *dest != NULL);
both = resize(*dest, existing_length + str_size + 1);
if (unlikely(both == NULL))
return false;
memcpy(both + existing_length, str, str_size);
both[existing_length + str_size] = '\0';
*dest = both;
return true;
}
char *
ralloc_asprintf(const void *ctx, const char *fmt, ...)
{
char *ptr;
va_list args;
va_start(args, fmt);
ptr = ralloc_vasprintf(ctx, fmt, args);
va_end(args);
return ptr;
}
char *
ralloc_vasprintf(const void *ctx, const char *fmt, va_list args)
{
size_t size = u_printf_length(fmt, args) + 1;
char *ptr = ralloc_size(ctx, size);
if (ptr != NULL)
vsnprintf(ptr, size, fmt, args);
return ptr;
}
bool
ralloc_asprintf_append(char **str, const char *fmt, ...)
{
bool success;
va_list args;
va_start(args, fmt);
success = ralloc_vasprintf_append(str, fmt, args);
va_end(args);
return success;
}
bool
ralloc_vasprintf_append(char **str, const char *fmt, va_list args)
{
size_t existing_length;
assert(str != NULL);
existing_length = *str ? strlen(*str) : 0;
return ralloc_vasprintf_rewrite_tail(str, &existing_length, fmt, args);
}
bool
ralloc_asprintf_rewrite_tail(char **str, size_t *start, const char *fmt, ...)
{
bool success;
va_list args;
va_start(args, fmt);
success = ralloc_vasprintf_rewrite_tail(str, start, fmt, args);
va_end(args);
return success;
}
bool
ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
va_list args)
{
size_t new_length;
char *ptr;
assert(str != NULL);
if (unlikely(*str == NULL)) {
// Assuming a NULL context is probably bad, but it's expected behavior.
*str = ralloc_vasprintf(NULL, fmt, args);
*start = strlen(*str);
return true;
}
new_length = u_printf_length(fmt, args);
ptr = resize(*str, *start + new_length + 1);
if (unlikely(ptr == NULL))
return false;
vsnprintf(ptr + *start, new_length + 1, fmt, args);
*str = ptr;
*start += new_length;
return true;
}
/***************************************************************************
* GC context.
***************************************************************************
*/
/* The maximum size of an object that will be allocated specially.
*/
#define MAX_FREELIST_SIZE 512
/* Allocations small enough to be allocated from a freelist will be aligned up
* to this size.
*/
#define FREELIST_ALIGNMENT 32
#define NUM_FREELIST_BUCKETS (MAX_FREELIST_SIZE / FREELIST_ALIGNMENT)
/* The size of a slab. */
#define SLAB_SIZE (32 * 1024)
#define GC_CONTEXT_CANARY 0xAF6B6C83
#define GC_CANARY 0xAF6B5B72
enum gc_flags {
IS_USED = (1 << 0),
CURRENT_GENERATION = (1 << 1),
IS_PADDING = (1 << 7),
};
typedef struct
{
#ifndef NDEBUG
/* A canary value used to determine whether a pointer is allocated using gc_alloc. */
unsigned canary;
#endif
uint16_t slab_offset;
uint8_t bucket;
uint8_t flags;
/* The last padding byte must have IS_PADDING set and is used to store the amount of padding. If
* there is no padding, the IS_PADDING bit of "flags" is unset and "flags" is checked instead.
* Because of this, "flags" must be the last member of this struct.
*/
uint8_t padding[];
} gc_block_header;
/* This structure is at the start of the slab. Objects inside a slab are
* allocated using a freelist backed by a simple linear allocator.
*/
typedef struct gc_slab {
alignas(HEADER_ALIGN)
gc_ctx *ctx;
/* Objects are allocated using either linear or freelist allocation. "next_available" is the
* pointer used for linear allocation, while "freelist" is the next free object for freelist
* allocation.
*/
char *next_available;
gc_block_header *freelist;
/* Slabs that handle the same-sized objects. */
struct list_head link;
/* Free slabs that handle the same-sized objects. */
struct list_head free_link;
/* Number of allocated and free objects, recorded so that we can free the slab if it
* becomes empty or add one to the freelist if it's no longer full.
*/
unsigned num_allocated;
unsigned num_free;
} gc_slab;
struct gc_ctx {
#ifndef NDEBUG
unsigned canary;
#endif
/* Array of slabs for fixed-size allocations. Each slab tracks allocations
* of specific sized blocks. User allocations are rounded up to the nearest
* fixed size. slabs[N] contains allocations of size
* FREELIST_ALIGNMENT * (N + 1).
*/
struct {
/* List of slabs in this bucket. */
struct list_head slabs;
/* List of slabs with free space in this bucket, so we can quickly choose one when
* allocating.
*/
struct list_head free_slabs;
} slabs[NUM_FREELIST_BUCKETS];
uint8_t current_gen;
void *rubbish;
};
static gc_block_header *
get_gc_header(const void *ptr)
{
uint8_t *c_ptr = (uint8_t *)ptr;
/* Adjust for padding added to ensure alignment of the allocation. There might also be padding
* added by the compiler into gc_block_header, but that isn't counted in the IS_PADDING byte.
*/
if (c_ptr[-1] & IS_PADDING)
c_ptr -= c_ptr[-1] & ~IS_PADDING;
c_ptr -= sizeof(gc_block_header);
gc_block_header *info = (gc_block_header *)c_ptr;
assert(info->canary == GC_CANARY);
return info;
}
static gc_block_header *
get_gc_freelist_next(gc_block_header *ptr)
{
gc_block_header *next;
/* work around possible strict aliasing bug using memcpy */
memcpy(&next, (void*)(ptr + 1), sizeof(next));
return next;
}
static void
set_gc_freelist_next(gc_block_header *ptr, gc_block_header *next)
{
memcpy((void*)(ptr + 1), &next, sizeof(next));
}
static gc_slab *
get_gc_slab(gc_block_header *header)
{
return (gc_slab *)((char *)header - header->slab_offset);
}
gc_ctx *
gc_context(const void *parent)
{
gc_ctx *ctx = rzalloc(parent, gc_ctx);
for (unsigned i = 0; i < NUM_FREELIST_BUCKETS; i++) {
list_inithead(&ctx->slabs[i].slabs);
list_inithead(&ctx->slabs[i].free_slabs);
}
#ifndef NDEBUG
ctx->canary = GC_CONTEXT_CANARY;
#endif
return ctx;
}
static_assert(UINT32_MAX >= MAX_FREELIST_SIZE, "Freelist sizes use uint32_t");
static uint32_t
gc_bucket_obj_size(uint32_t bucket)
{
return (bucket + 1) * FREELIST_ALIGNMENT;
}
static uint32_t
gc_bucket_for_size(uint32_t size)
{
return (size - 1) / FREELIST_ALIGNMENT;
}
static_assert(UINT32_MAX >= SLAB_SIZE, "SLAB_SIZE use uint32_t");
static uint32_t
gc_bucket_num_objs(uint32_t bucket)
{
return (SLAB_SIZE - sizeof(gc_slab)) / gc_bucket_obj_size(bucket);
}
static gc_block_header *
alloc_from_slab(gc_slab *slab, uint32_t bucket)
{
uint32_t size = gc_bucket_obj_size(bucket);
gc_block_header *header;
if (slab->freelist) {
/* Prioritize already-allocated chunks, since they probably have a page
* backing them.
*/
header = slab->freelist;
slab->freelist = get_gc_freelist_next(slab->freelist);
} else if (slab->next_available + size <= ((char *) slab) + SLAB_SIZE) {
header = (gc_block_header *) slab->next_available;
header->slab_offset = (char *) header - (char *) slab;
header->bucket = bucket;
slab->next_available += size;
} else {
return NULL;
}
slab->num_allocated++;
slab->num_free--;
if (!slab->num_free)
list_del(&slab->free_link);
return header;
}
static void
free_slab(gc_slab *slab)
{
if (list_is_linked(&slab->free_link))
list_del(&slab->free_link);
list_del(&slab->link);
ralloc_free(slab);
}
static void
free_from_slab(gc_block_header *header, bool keep_empty_slabs)
{
gc_slab *slab = get_gc_slab(header);
if (slab->num_allocated == 1 && !(keep_empty_slabs && list_is_singular(&slab->free_link))) {
/* Free the slab if this is the last object. */
free_slab(slab);
return;
} else if (slab->num_free == 0) {
list_add(&slab->free_link, &slab->ctx->slabs[header->bucket].free_slabs);
} else {
/* Keep the free list sorted by the number of free objects in ascending order. By prefering to
* allocate from the slab with the fewest free objects, we help free the slabs with many free
* objects.
*/
while (slab->free_link.next != &slab->ctx->slabs[header->bucket].free_slabs &&
slab->num_free > list_entry(slab->free_link.next, gc_slab, free_link)->num_free) {
gc_slab *next = list_entry(slab->free_link.next, gc_slab, free_link);
/* Move "slab" to after "next". */
list_move_to(&slab->free_link, &next->free_link);
}
}
set_gc_freelist_next(header, slab->freelist);
slab->freelist = header;
slab->num_allocated--;
slab->num_free++;
}
static uint32_t
get_slab_size(uint32_t bucket)
{
/* SLAB_SIZE rounded down to a multiple of the object size so that it's not larger than what can
* be used.
*/
uint32_t obj_size = gc_bucket_obj_size(bucket);
uint32_t num_objs = gc_bucket_num_objs(bucket);
return align((uint32_t)sizeof(gc_slab) + num_objs * obj_size, alignof(gc_slab));
}
static gc_slab *
create_slab(gc_ctx *ctx, unsigned bucket)
{
gc_slab *slab = ralloc_size(ctx, get_slab_size(bucket));
if (unlikely(!slab))
return NULL;
slab->ctx = ctx;
slab->freelist = NULL;
slab->next_available = (char*)(slab + 1);
slab->num_allocated = 0;
slab->num_free = gc_bucket_num_objs(bucket);
list_addtail(&slab->link, &ctx->slabs[bucket].slabs);
list_addtail(&slab->free_link, &ctx->slabs[bucket].free_slabs);
return slab;
}
void *
gc_alloc_size(gc_ctx *ctx, size_t size, size_t alignment)
{
assert(ctx);
assert(util_is_power_of_two_nonzero_uintptr(alignment));
alignment = MAX2(alignment, alignof(gc_block_header));
/* Alignment will add at most align-alignof(gc_block_header) bytes of padding to the header, and
* the IS_PADDING byte can only encode up to 127.
*/
assert((alignment - alignof(gc_block_header)) <= 127);
/* We can only align as high as the slab is. */
assert(alignment <= HEADER_ALIGN);
size_t header_size = align64(sizeof(gc_block_header), alignment);
size = align64(size, alignment);
size += header_size;
gc_block_header *header = NULL;
if (size <= MAX_FREELIST_SIZE) {
uint32_t bucket = gc_bucket_for_size((uint32_t)size);
if (list_is_empty(&ctx->slabs[bucket].free_slabs) && !create_slab(ctx, bucket))
return NULL;
gc_slab *slab = list_first_entry(&ctx->slabs[bucket].free_slabs, gc_slab, free_link);
header = alloc_from_slab(slab, bucket);
} else {
header = ralloc_size(ctx, size);
if (unlikely(!header))
return NULL;
/* Mark the header as allocated directly, so we know to actually free it. */
header->bucket = NUM_FREELIST_BUCKETS;
}
header->flags = ctx->current_gen | IS_USED;
#ifndef NDEBUG
header->canary = GC_CANARY;
#endif
uint8_t *ptr = (uint8_t *)header + header_size;
if ((header_size - 1) != offsetof(gc_block_header, flags))
ptr[-1] = IS_PADDING | (header_size - sizeof(gc_block_header));
assert(((uintptr_t)ptr & (alignment - 1)) == 0);
return ptr;
}
void *
gc_zalloc_size(gc_ctx *ctx, size_t size, size_t alignment)
{
void *ptr = gc_alloc_size(ctx, size, alignment);
if (likely(ptr))
memset(ptr, 0, size);
return ptr;
}
void
gc_free(void *ptr)
{
if (!ptr)
return;
gc_block_header *header = get_gc_header(ptr);
header->flags &= ~IS_USED;
if (header->bucket < NUM_FREELIST_BUCKETS)
free_from_slab(header, true);
else
ralloc_free(header);
}
gc_ctx *gc_get_context(void *ptr)
{
gc_block_header *header = get_gc_header(ptr);
if (header->bucket < NUM_FREELIST_BUCKETS)
return get_gc_slab(header)->ctx;
else
return ralloc_parent(header);
}
void
gc_sweep_start(gc_ctx *ctx)
{
ctx->current_gen ^= CURRENT_GENERATION;
ctx->rubbish = ralloc_context(NULL);
ralloc_adopt(ctx->rubbish, ctx);
}
void
gc_mark_live(gc_ctx *ctx, const void *mem)
{
gc_block_header *header = get_gc_header(mem);
if (header->bucket < NUM_FREELIST_BUCKETS)
header->flags ^= CURRENT_GENERATION;
else
ralloc_steal(ctx, header);
}
void
gc_sweep_end(gc_ctx *ctx)
{
assert(ctx->rubbish);
for (unsigned i = 0; i < NUM_FREELIST_BUCKETS; i++) {
unsigned obj_size = gc_bucket_obj_size(i);
list_for_each_entry_safe(gc_slab, slab, &ctx->slabs[i].slabs, link) {
if (!slab->num_allocated) {
free_slab(slab);
continue;
}
for (char *ptr = (char*)(slab + 1); ptr != slab->next_available; ptr += obj_size) {
gc_block_header *header = (gc_block_header *)ptr;
if (!(header->flags & IS_USED))
continue;
if ((header->flags & CURRENT_GENERATION) == ctx->current_gen)
continue;
bool last = slab->num_allocated == 1;
header->flags &= ~IS_USED;
free_from_slab(header, false);
if (last)
break;
}
}
}
for (unsigned i = 0; i < NUM_FREELIST_BUCKETS; i++) {
list_for_each_entry(gc_slab, slab, &ctx->slabs[i].slabs, link) {
assert(slab->num_allocated > 0); /* free_from_slab() should free it otherwise */
ralloc_steal(ctx, slab);
}
}
ralloc_free(ctx->rubbish);
ctx->rubbish = NULL;
}
/***************************************************************************
* Linear allocator for short-lived allocations.
***************************************************************************
*
* The allocator consists of a parent node (2K buffer), which requires
* a ralloc parent, and child nodes (allocations). Child nodes can't be freed
* directly, because the parent doesn't track them. You have to release
* the parent node in order to release all its children.
*
* The allocator uses a fixed-sized buffer with a monotonically increasing
* offset after each allocation. If the buffer is all used, another buffer
* is allocated, using the linear parent node as ralloc parent.
*
* The linear parent node is always the first buffer and keeps track of all
* other buffers.
*/
2018-11-11 13:44:41 -08:00
#define SUBALLOC_ALIGNMENT 8
#define LMAGIC_CONTEXT 0x87b9c7d3
#define LMAGIC_NODE 0x87b910d3
struct linear_ctx {
alignas(HEADER_ALIGN)
#ifndef NDEBUG
unsigned magic; /* for debugging */
#endif
unsigned min_buffer_size;
unsigned offset; /* points to the first unused byte in the latest buffer */
unsigned size; /* size of the latest buffer */
void *latest; /* the only buffer that has free space */
};
typedef struct linear_ctx linear_ctx;
#ifndef NDEBUG
struct linear_node_canary {
alignas(HEADER_ALIGN)
unsigned magic;
unsigned offset; /* points to the first unused byte in *this* buffer */
};
typedef struct linear_node_canary linear_node_canary;
static linear_node_canary *
get_node_canary(void *ptr)
{
return (void *)((char *)ptr - sizeof(linear_node_canary));
}
#endif
static unsigned
get_node_canary_size()
{
#ifndef NDEBUG
return sizeof(linear_node_canary);
#else
return 0;
#endif
}
void *
linear_alloc_child(linear_ctx *ctx, unsigned size)
{
assert(ctx->magic == LMAGIC_CONTEXT);
assert(get_node_canary(ctx->latest)->magic == LMAGIC_NODE);
assert(get_node_canary(ctx->latest)->offset == ctx->offset);
size = ALIGN_POT(size, SUBALLOC_ALIGNMENT);
if (unlikely(ctx->offset + size > ctx->size)) {
/* allocate a new node */
unsigned node_size = size;
if (likely(node_size < ctx->min_buffer_size))
node_size = ctx->min_buffer_size;
const unsigned canary_size = get_node_canary_size();
const unsigned full_size = canary_size + node_size;
/* linear context is also a ralloc context */
char *ptr = ralloc_size(ctx, full_size);
if (unlikely(!ptr))
return NULL;
#ifndef NDEBUG
linear_node_canary *canary = (void *) ptr;
canary->magic = LMAGIC_NODE;
canary->offset = 0;
#endif
/* If the new buffer is going to be full, don't update `latest`
* pointer. Either the current one is also full, so doesn't
* matter, or the current one is not full, so there's still chance
* to use that space.
*/
if (unlikely(size == node_size)) {
#ifndef NDEBUG
canary->offset = size;
#endif
assert((uintptr_t)(ptr + canary_size) % SUBALLOC_ALIGNMENT == 0);
return ptr + canary_size;
}
ctx->offset = 0;
ctx->size = node_size;
ctx->latest = ptr + canary_size;
}
void *ptr = (char *)ctx->latest + ctx->offset;
ctx->offset += size;
#ifndef NDEBUG
linear_node_canary *canary = get_node_canary(ctx->latest);
canary->offset += size;
#endif
2018-11-11 13:44:41 -08:00
assert((uintptr_t)ptr % SUBALLOC_ALIGNMENT == 0);
return ptr;
}
linear_ctx *
linear_context(void *ralloc_ctx)
{
const linear_opts opts = {0};
return linear_context_with_opts(ralloc_ctx, &opts);
}
linear_ctx *
linear_context_with_opts(void *ralloc_ctx, const linear_opts *opts)
{
linear_ctx *ctx;
if (unlikely(!ralloc_ctx))
return NULL;
const unsigned default_min_buffer_size = 2048;
const unsigned min_buffer_size =
MAX2(ALIGN_POT(opts->min_buffer_size, default_min_buffer_size),
default_min_buffer_size);
const unsigned size = min_buffer_size;
const unsigned canary_size = get_node_canary_size();
const unsigned full_size =
sizeof(linear_ctx) + canary_size + size;
ctx = ralloc_size(ralloc_ctx, full_size);
if (unlikely(!ctx))
return NULL;
ctx->min_buffer_size = min_buffer_size;
ctx->offset = 0;
ctx->size = size;
ctx->latest = (char *)&ctx[1] + canary_size;
#ifndef NDEBUG
ctx->magic = LMAGIC_CONTEXT;
linear_node_canary *canary = get_node_canary(ctx->latest);
canary->magic = LMAGIC_NODE;
canary->offset = 0;
#endif
return ctx;
}
void *
linear_zalloc_child(linear_ctx *ctx, unsigned size)
{
void *ptr = linear_alloc_child(ctx, size);
if (likely(ptr))
memset(ptr, 0, size);
return ptr;
}
void
linear_free_context(linear_ctx *ctx)
{
if (unlikely(!ctx))
return;
assert(ctx->magic == LMAGIC_CONTEXT);
/* Linear context is also the ralloc parent of extra nodes. */
ralloc_free(ctx);
}
void
ralloc_steal_linear_context(void *new_ralloc_ctx, linear_ctx *ctx)
{
if (unlikely(!ctx))
return;
assert(ctx->magic == LMAGIC_CONTEXT);
/* Linear context is also the ralloc parent of extra nodes. */
ralloc_steal(new_ralloc_ctx, ctx);
}
void *
ralloc_parent_of_linear_context(linear_ctx *ctx)
{
assert(ctx->magic == LMAGIC_CONTEXT);
return PTR_FROM_HEADER(get_header(ctx)->parent);
}
/* All code below is pretty much copied from ralloc and only the alloc
* calls are different.
*/
char *
linear_strdup(linear_ctx *ctx, const char *str)
{
unsigned n;
char *ptr;
if (unlikely(!str))
return NULL;
n = strlen(str);
ptr = linear_alloc_child(ctx, n + 1);
if (unlikely(!ptr))
return NULL;
memcpy(ptr, str, n);
ptr[n] = '\0';
return ptr;
}
char *
linear_asprintf(linear_ctx *ctx, const char *fmt, ...)
{
char *ptr;
va_list args;
va_start(args, fmt);
ptr = linear_vasprintf(ctx, fmt, args);
va_end(args);
return ptr;
}
char *
linear_vasprintf(linear_ctx *ctx, const char *fmt, va_list args)
{
unsigned size = u_printf_length(fmt, args) + 1;
char *ptr = linear_alloc_child(ctx, size);
if (ptr != NULL)
vsnprintf(ptr, size, fmt, args);
return ptr;
}
bool
linear_asprintf_append(linear_ctx *ctx, char **str, const char *fmt, ...)
{
bool success;
va_list args;
va_start(args, fmt);
success = linear_vasprintf_append(ctx, str, fmt, args);
va_end(args);
return success;
}
bool
linear_vasprintf_append(linear_ctx *ctx, char **str, const char *fmt, va_list args)
{
size_t existing_length;
assert(str != NULL);
existing_length = *str ? strlen(*str) : 0;
return linear_vasprintf_rewrite_tail(ctx, str, &existing_length, fmt, args);
}
bool
linear_asprintf_rewrite_tail(linear_ctx *ctx, char **str, size_t *start,
const char *fmt, ...)
{
bool success;
va_list args;
va_start(args, fmt);
success = linear_vasprintf_rewrite_tail(ctx, str, start, fmt, args);
va_end(args);
return success;
}
bool
linear_vasprintf_rewrite_tail(linear_ctx *ctx, char **str, size_t *start,
const char *fmt, va_list args)
{
size_t new_length;
char *ptr;
assert(str != NULL);
if (unlikely(*str == NULL)) {
*str = linear_vasprintf(ctx, fmt, args);
*start = strlen(*str);
return true;
}
new_length = u_printf_length(fmt, args);
ptr = linear_alloc_child(ctx, *start + new_length + 1);
if (unlikely(ptr == NULL))
return false;
memcpy(ptr, *str, *start);
vsnprintf(ptr + *start, new_length + 1, fmt, args);
*str = ptr;
*start += new_length;
return true;
}
/* helper routine for strcat/strncat - n is the exact amount to copy */
static bool
linear_cat(linear_ctx *ctx, char **dest, const char *str, unsigned n)
{
char *both;
unsigned existing_length;
assert(dest != NULL && *dest != NULL);
existing_length = strlen(*dest);
both = linear_alloc_child(ctx, existing_length + n + 1);
if (unlikely(both == NULL))
return false;
memcpy(both, *dest, existing_length);
memcpy(both + existing_length, str, n);
both[existing_length + n] = '\0';
*dest = both;
return true;
}
bool
linear_strcat(linear_ctx *ctx, char **dest, const char *str)
{
return linear_cat(ctx, dest, str, strlen(str));
}
void *
linear_alloc_child_array(linear_ctx *ctx, size_t size, unsigned count)
{
if (count > SIZE_MAX/size)
return NULL;
return linear_alloc_child(ctx, size * count);
}
void *
linear_zalloc_child_array(linear_ctx *ctx, size_t size, unsigned count)
{
if (count > SIZE_MAX/size)
return NULL;
return linear_zalloc_child(ctx, size * count);
}
util: Add function print information about a ralloc tree In release mode print the ralloc tree header pointers. In debug mode, will look at the child allocations recursively checking for the canary values. In most cases this should work and give us extra information about the type of the subtree (GC, Linear) and the allocation sizes. For example, calling it at the end of spirv_to_nir() will output the following tree with a summary at the end (lines elided to focus on the general structure of the output) ``` 0xca8d00 (472 bytes) 0xdf2c60 (32 bytes) 0xdf2c00 (32 bytes) 0xdf2ba0 (32 bytes) 0xdf2b40 (32 bytes) (...) 0xcde8b0 (64 bytes) 0xcde760 (72 bytes) 0xd2de40 (168 bytes) 0xcce660 (64 bytes) 0xcce510 (72 bytes) 0xcce5a0 (120 bytes) 0xcce490 (64 bytes) (...) 0xcbc450 (456 bytes) 0xdf55c0 (160 bytes) 0xdf5730 (72 bytes) 0xdf57c0 (80 bytes) 0xdf5530 (72 bytes) 0xdf56a0 (80 bytes) (...) 0xcbe840 (128 bytes) 0xcc4310 (4 bytes) 0xcbc660 (536 bytes) (gc context) 0xde6b40 (32576 bytes) (gc slab or large block) 0xddb160 (32704 bytes) (gc slab or large block) 0xdc8d50 (32704 bytes) (gc slab or large block) (...) 0xcde9a0 (32704 bytes) (gc slab or large block) 0xcd6720 (32704 bytes) (gc slab or large block) 0xcce6e0 (32768 bytes) (gc slab or large block) 0xcbc330 (72 bytes) 0xd680a0 (208 bytes) 0xca9010 (78560 bytes) 0xca8f20 (176 bytes) ==== RALLOC INFO ptr=0xca8d30 info=0xca8d00 ralloc allocations = 4714 - linear = 0 - gc = 23 - other = 4691 content bytes = 1055139 ralloc metadata bytes = 226272 linear metadata bytes = 0 ==== ``` There's a flag to pass so only the summary at the end is printed. Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25482>
2023-09-18 14:55:58 -07:00
typedef struct {
FILE *f;
unsigned indent;
unsigned ralloc_count;
unsigned linear_count;
unsigned gc_count;
/* These don't include padding or metadata from suballocators. */
unsigned content_bytes;
unsigned ralloc_metadata_bytes;
unsigned linear_metadata_bytes;
unsigned gc_metadata_bytes;
bool inside_linear;
bool inside_gc;
} ralloc_print_info_state;
static void
ralloc_print_info_helper(ralloc_print_info_state *state, const ralloc_header *info)
{
FILE *f = state->f;
if (f) {
for (unsigned i = 0; i < state->indent; i++) fputc(' ', f);
fprintf(f, "%p", info);
}
/* TODO: Account for padding used in various places. */
#ifndef NDEBUG
assert(info->canary == CANARY);
if (f) fprintf(f, " (%d bytes)", info->size);
state->content_bytes += info->size;
state->ralloc_metadata_bytes += sizeof(ralloc_header);
const void *ptr = PTR_FROM_HEADER(info);
const linear_ctx *lin_ctx = ptr;
const gc_ctx *gc_ctx = ptr;
const bool is_linear = info->size >= sizeof(*lin_ctx) &&
lin_ctx->magic == LMAGIC_CONTEXT;
const bool is_gc = info->size >= sizeof(*gc_ctx) &&
gc_ctx->canary == GC_CONTEXT_CANARY;
if (is_linear) {
util: Add function print information about a ralloc tree In release mode print the ralloc tree header pointers. In debug mode, will look at the child allocations recursively checking for the canary values. In most cases this should work and give us extra information about the type of the subtree (GC, Linear) and the allocation sizes. For example, calling it at the end of spirv_to_nir() will output the following tree with a summary at the end (lines elided to focus on the general structure of the output) ``` 0xca8d00 (472 bytes) 0xdf2c60 (32 bytes) 0xdf2c00 (32 bytes) 0xdf2ba0 (32 bytes) 0xdf2b40 (32 bytes) (...) 0xcde8b0 (64 bytes) 0xcde760 (72 bytes) 0xd2de40 (168 bytes) 0xcce660 (64 bytes) 0xcce510 (72 bytes) 0xcce5a0 (120 bytes) 0xcce490 (64 bytes) (...) 0xcbc450 (456 bytes) 0xdf55c0 (160 bytes) 0xdf5730 (72 bytes) 0xdf57c0 (80 bytes) 0xdf5530 (72 bytes) 0xdf56a0 (80 bytes) (...) 0xcbe840 (128 bytes) 0xcc4310 (4 bytes) 0xcbc660 (536 bytes) (gc context) 0xde6b40 (32576 bytes) (gc slab or large block) 0xddb160 (32704 bytes) (gc slab or large block) 0xdc8d50 (32704 bytes) (gc slab or large block) (...) 0xcde9a0 (32704 bytes) (gc slab or large block) 0xcd6720 (32704 bytes) (gc slab or large block) 0xcce6e0 (32768 bytes) (gc slab or large block) 0xcbc330 (72 bytes) 0xd680a0 (208 bytes) 0xca9010 (78560 bytes) 0xca8f20 (176 bytes) ==== RALLOC INFO ptr=0xca8d30 info=0xca8d00 ralloc allocations = 4714 - linear = 0 - gc = 23 - other = 4691 content bytes = 1055139 ralloc metadata bytes = 226272 linear metadata bytes = 0 ==== ``` There's a flag to pass so only the summary at the end is printed. Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25482>
2023-09-18 14:55:58 -07:00
if (f) fprintf(f, " (linear context)");
assert(!state->inside_gc && !state->inside_linear);
state->inside_linear = true;
state->linear_metadata_bytes += sizeof(linear_ctx);
state->content_bytes -= sizeof(linear_ctx);
state->linear_count++;
} else if (is_gc) {
util: Add function print information about a ralloc tree In release mode print the ralloc tree header pointers. In debug mode, will look at the child allocations recursively checking for the canary values. In most cases this should work and give us extra information about the type of the subtree (GC, Linear) and the allocation sizes. For example, calling it at the end of spirv_to_nir() will output the following tree with a summary at the end (lines elided to focus on the general structure of the output) ``` 0xca8d00 (472 bytes) 0xdf2c60 (32 bytes) 0xdf2c00 (32 bytes) 0xdf2ba0 (32 bytes) 0xdf2b40 (32 bytes) (...) 0xcde8b0 (64 bytes) 0xcde760 (72 bytes) 0xd2de40 (168 bytes) 0xcce660 (64 bytes) 0xcce510 (72 bytes) 0xcce5a0 (120 bytes) 0xcce490 (64 bytes) (...) 0xcbc450 (456 bytes) 0xdf55c0 (160 bytes) 0xdf5730 (72 bytes) 0xdf57c0 (80 bytes) 0xdf5530 (72 bytes) 0xdf56a0 (80 bytes) (...) 0xcbe840 (128 bytes) 0xcc4310 (4 bytes) 0xcbc660 (536 bytes) (gc context) 0xde6b40 (32576 bytes) (gc slab or large block) 0xddb160 (32704 bytes) (gc slab or large block) 0xdc8d50 (32704 bytes) (gc slab or large block) (...) 0xcde9a0 (32704 bytes) (gc slab or large block) 0xcd6720 (32704 bytes) (gc slab or large block) 0xcce6e0 (32768 bytes) (gc slab or large block) 0xcbc330 (72 bytes) 0xd680a0 (208 bytes) 0xca9010 (78560 bytes) 0xca8f20 (176 bytes) ==== RALLOC INFO ptr=0xca8d30 info=0xca8d00 ralloc allocations = 4714 - linear = 0 - gc = 23 - other = 4691 content bytes = 1055139 ralloc metadata bytes = 226272 linear metadata bytes = 0 ==== ``` There's a flag to pass so only the summary at the end is printed. Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25482>
2023-09-18 14:55:58 -07:00
if (f) fprintf(f, " (gc context)");
assert(!state->inside_gc && !state->inside_linear);
state->inside_gc = true;
state->gc_metadata_bytes += sizeof(gc_block_header);
} else if (state->inside_linear) {
const linear_node_canary *lin_node = ptr;
if (lin_node->magic == LMAGIC_NODE) {
if (f) fprintf(f, " (linear node buffer)");
state->content_bytes -= sizeof(linear_node_canary);
state->linear_metadata_bytes += sizeof(linear_node_canary);
state->linear_count++;
}
} else if (state->inside_gc) {
if (f) fprintf(f, " (gc slab or large block)");
state->gc_count++;
}
#endif
state->ralloc_count++;
if (f) fprintf(f, "\n");
const ralloc_header *c = info->child;
state->indent += 2;
while (c != NULL) {
ralloc_print_info_helper(state, c);
c = c->next;
}
state->indent -= 2;
#ifndef NDEBUG
if (is_linear) state->inside_linear = false;
else if (is_gc) state->inside_gc = false;
util: Add function print information about a ralloc tree In release mode print the ralloc tree header pointers. In debug mode, will look at the child allocations recursively checking for the canary values. In most cases this should work and give us extra information about the type of the subtree (GC, Linear) and the allocation sizes. For example, calling it at the end of spirv_to_nir() will output the following tree with a summary at the end (lines elided to focus on the general structure of the output) ``` 0xca8d00 (472 bytes) 0xdf2c60 (32 bytes) 0xdf2c00 (32 bytes) 0xdf2ba0 (32 bytes) 0xdf2b40 (32 bytes) (...) 0xcde8b0 (64 bytes) 0xcde760 (72 bytes) 0xd2de40 (168 bytes) 0xcce660 (64 bytes) 0xcce510 (72 bytes) 0xcce5a0 (120 bytes) 0xcce490 (64 bytes) (...) 0xcbc450 (456 bytes) 0xdf55c0 (160 bytes) 0xdf5730 (72 bytes) 0xdf57c0 (80 bytes) 0xdf5530 (72 bytes) 0xdf56a0 (80 bytes) (...) 0xcbe840 (128 bytes) 0xcc4310 (4 bytes) 0xcbc660 (536 bytes) (gc context) 0xde6b40 (32576 bytes) (gc slab or large block) 0xddb160 (32704 bytes) (gc slab or large block) 0xdc8d50 (32704 bytes) (gc slab or large block) (...) 0xcde9a0 (32704 bytes) (gc slab or large block) 0xcd6720 (32704 bytes) (gc slab or large block) 0xcce6e0 (32768 bytes) (gc slab or large block) 0xcbc330 (72 bytes) 0xd680a0 (208 bytes) 0xca9010 (78560 bytes) 0xca8f20 (176 bytes) ==== RALLOC INFO ptr=0xca8d30 info=0xca8d00 ralloc allocations = 4714 - linear = 0 - gc = 23 - other = 4691 content bytes = 1055139 ralloc metadata bytes = 226272 linear metadata bytes = 0 ==== ``` There's a flag to pass so only the summary at the end is printed. Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25482>
2023-09-18 14:55:58 -07:00
#endif
}
void
ralloc_print_info(FILE *f, const void *p, unsigned flags)
{
ralloc_print_info_state state = {
.f = ((flags & RALLOC_PRINT_INFO_SUMMARY_ONLY) == 1) ? NULL : f,
};
const ralloc_header *info = get_header(p);
ralloc_print_info_helper(&state, info);
fprintf(f, "==== RALLOC INFO ptr=%p info=%p\n"
"ralloc allocations = %d\n"
" - linear = %d\n"
" - gc = %d\n"
" - other = %d\n",
p, info,
state.ralloc_count,
state.linear_count,
state.gc_count,
state.ralloc_count - state.linear_count - state.gc_count);
if (state.content_bytes) {
fprintf(f,
"content bytes = %d\n"
"ralloc metadata bytes = %d\n"
"linear metadata bytes = %d\n",
state.content_bytes,
state.ralloc_metadata_bytes,
state.linear_metadata_bytes);
}
fprintf(f, "====\n");
}