mesa/src/util/ralloc.c
Jonas Pfeil fbaf5955ee ralloc: Make sure ralloc() allocations match malloc()'s alignment.
The header of ralloc needs to be aligned, because the compiler assumes
that malloc returns will be aligned to 8/16 bytes depending on the
platform, leading to degraded performance or alignment faults with ralloc.

Fixes SIGBUS on Raspberry Pi at high optimization levels.

This patch is not perfect for MSVC, as maybe in the future the alignment
for the most demanding data type might change to more than 8.

v2: Commit message reword/typo fix, and add a bigger explanation in the
    code (by anholt)

Signed-off-by: Jonas Pfeil <pfeiljonas@gmx.de>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Cc: mesa-stable@lists.freedesktop.org
(cherry picked from commit cd2b55e536)

Squashed with

ralloc: don't leave out the alignment factor

Experimentation shows that without alignment factor gcc and clang choose
a factor of 16 even on IA-32, which doesn't match what malloc() uses (8).
The problem is it makes gcc assume the pointer is 16 byte aligned, so
with -O3 it starts using aligned SSE instructions that later fault,
so always specify a suitable alignment factor.

Cc: Jonas Pfeil <pfeiljonas@gmx.de>
Fixes: cd2b55e5 "ralloc: Make sure ralloc() allocations match malloc()'s alignment."
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100049
Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
Tested by: Mike Lothian <mike@fireburn.co.uk>
Tested by: Jonas Pfeil <pfeiljonas@gmx.de>

(cherry picked from commit ff494fe999)
2017-03-16 01:33:52 +00:00

896 lines
20 KiB
C

/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
/* Android defines SIZE_MAX in limits.h, instead of the standard stdint.h */
#ifdef ANDROID
#include <limits.h>
#endif
/* Some versions of MinGW are missing _vscprintf's declaration, although they
* still provide the symbol in the import library. */
#ifdef __MINGW32__
_CRTIMP int _vscprintf(const char *format, va_list argptr);
#endif
#include "ralloc.h"
#ifndef va_copy
#ifdef __va_copy
#define va_copy(dest, src) __va_copy((dest), (src))
#else
#define va_copy(dest, src) (dest) = (src)
#endif
#endif
#define CANARY 0x5A1106
/* Align the header's size so that ralloc() allocations will return with the
* same alignment as a libc malloc would have (8 on 32-bit GLIBC, 16 on
* 64-bit), avoiding performance penalities on x86 and alignment faults on
* ARM.
*/
struct
#ifdef _MSC_VER
__declspec(align(8))
#elif defined(__LP64__)
__attribute__((aligned(16)))
#else
__attribute__((aligned(8)))
#endif
ralloc_header
{
#ifdef DEBUG
/* A canary value used to determine whether a pointer is ralloc'd. */
unsigned canary;
#endif
struct ralloc_header *parent;
/* The first child (head of a linked list) */
struct ralloc_header *child;
/* Linked list of siblings */
struct ralloc_header *prev;
struct ralloc_header *next;
void (*destructor)(void *);
};
typedef struct ralloc_header ralloc_header;
static void unlink_block(ralloc_header *info);
static void unsafe_free(ralloc_header *info);
static ralloc_header *
get_header(const void *ptr)
{
ralloc_header *info = (ralloc_header *) (((char *) ptr) -
sizeof(ralloc_header));
#ifdef DEBUG
assert(info->canary == CANARY);
#endif
return info;
}
#define PTR_FROM_HEADER(info) (((char *) info) + sizeof(ralloc_header))
static void
add_child(ralloc_header *parent, ralloc_header *info)
{
if (parent != NULL) {
info->parent = parent;
info->next = parent->child;
parent->child = info;
if (info->next != NULL)
info->next->prev = info;
}
}
void *
ralloc_context(const void *ctx)
{
return ralloc_size(ctx, 0);
}
void *
ralloc_size(const void *ctx, size_t size)
{
void *block = malloc(size + sizeof(ralloc_header));
ralloc_header *info;
ralloc_header *parent;
if (unlikely(block == NULL))
return NULL;
info = (ralloc_header *) block;
/* measurements have shown that calloc is slower (because of
* the multiplication overflow checking?), so clear things
* manually
*/
info->parent = NULL;
info->child = NULL;
info->prev = NULL;
info->next = NULL;
info->destructor = NULL;
parent = ctx != NULL ? get_header(ctx) : NULL;
add_child(parent, info);
#ifdef DEBUG
info->canary = CANARY;
#endif
return PTR_FROM_HEADER(info);
}
void *
rzalloc_size(const void *ctx, size_t size)
{
void *ptr = ralloc_size(ctx, size);
if (likely(ptr))
memset(ptr, 0, size);
return ptr;
}
/* helper function - assumes ptr != NULL */
static void *
resize(void *ptr, size_t size)
{
ralloc_header *child, *old, *info;
old = get_header(ptr);
info = realloc(old, size + sizeof(ralloc_header));
if (info == NULL)
return NULL;
/* Update parent and sibling's links to the reallocated node. */
if (info != old && info->parent != NULL) {
if (info->parent->child == old)
info->parent->child = info;
if (info->prev != NULL)
info->prev->next = info;
if (info->next != NULL)
info->next->prev = info;
}
/* Update child->parent links for all children */
for (child = info->child; child != NULL; child = child->next)
child->parent = info;
return PTR_FROM_HEADER(info);
}
void *
reralloc_size(const void *ctx, void *ptr, size_t size)
{
if (unlikely(ptr == NULL))
return ralloc_size(ctx, size);
assert(ralloc_parent(ptr) == ctx);
return resize(ptr, size);
}
void *
ralloc_array_size(const void *ctx, size_t size, unsigned count)
{
if (count > SIZE_MAX/size)
return NULL;
return ralloc_size(ctx, size * count);
}
void *
rzalloc_array_size(const void *ctx, size_t size, unsigned count)
{
if (count > SIZE_MAX/size)
return NULL;
return rzalloc_size(ctx, size * count);
}
void *
reralloc_array_size(const void *ctx, void *ptr, size_t size, unsigned count)
{
if (count > SIZE_MAX/size)
return NULL;
return reralloc_size(ctx, ptr, size * count);
}
void
ralloc_free(void *ptr)
{
ralloc_header *info;
if (ptr == NULL)
return;
info = get_header(ptr);
unlink_block(info);
unsafe_free(info);
}
static void
unlink_block(ralloc_header *info)
{
/* Unlink from parent & siblings */
if (info->parent != NULL) {
if (info->parent->child == info)
info->parent->child = info->next;
if (info->prev != NULL)
info->prev->next = info->next;
if (info->next != NULL)
info->next->prev = info->prev;
}
info->parent = NULL;
info->prev = NULL;
info->next = NULL;
}
static void
unsafe_free(ralloc_header *info)
{
/* Recursively free any children...don't waste time unlinking them. */
ralloc_header *temp;
while (info->child != NULL) {
temp = info->child;
info->child = temp->next;
unsafe_free(temp);
}
/* Free the block itself. Call the destructor first, if any. */
if (info->destructor != NULL)
info->destructor(PTR_FROM_HEADER(info));
free(info);
}
void
ralloc_steal(const void *new_ctx, void *ptr)
{
ralloc_header *info, *parent;
if (unlikely(ptr == NULL))
return;
info = get_header(ptr);
parent = get_header(new_ctx);
unlink_block(info);
add_child(parent, info);
}
void
ralloc_adopt(const void *new_ctx, void *old_ctx)
{
ralloc_header *new_info, *old_info, *child;
if (unlikely(old_ctx == NULL))
return;
old_info = get_header(old_ctx);
new_info = get_header(new_ctx);
/* If there are no children, bail. */
if (unlikely(old_info->child == NULL))
return;
/* Set all the children's parent to new_ctx; get a pointer to the last child. */
for (child = old_info->child; child->next != NULL; child = child->next) {
child->parent = new_info;
}
/* Connect the two lists together; parent them to new_ctx; make old_ctx empty. */
child->next = new_info->child;
child->parent = new_info;
new_info->child = old_info->child;
old_info->child = NULL;
}
void *
ralloc_parent(const void *ptr)
{
ralloc_header *info;
if (unlikely(ptr == NULL))
return NULL;
info = get_header(ptr);
return info->parent ? PTR_FROM_HEADER(info->parent) : NULL;
}
static void *autofree_context = NULL;
static void
autofree(void)
{
ralloc_free(autofree_context);
}
void *
ralloc_autofree_context(void)
{
if (unlikely(autofree_context == NULL)) {
autofree_context = ralloc_context(NULL);
atexit(autofree);
}
return autofree_context;
}
void
ralloc_set_destructor(const void *ptr, void(*destructor)(void *))
{
ralloc_header *info = get_header(ptr);
info->destructor = destructor;
}
char *
ralloc_strdup(const void *ctx, const char *str)
{
size_t n;
char *ptr;
if (unlikely(str == NULL))
return NULL;
n = strlen(str);
ptr = ralloc_array(ctx, char, n + 1);
memcpy(ptr, str, n);
ptr[n] = '\0';
return ptr;
}
char *
ralloc_strndup(const void *ctx, const char *str, size_t max)
{
size_t n;
char *ptr;
if (unlikely(str == NULL))
return NULL;
n = strnlen(str, max);
ptr = ralloc_array(ctx, char, n + 1);
memcpy(ptr, str, n);
ptr[n] = '\0';
return ptr;
}
/* helper routine for strcat/strncat - n is the exact amount to copy */
static bool
cat(char **dest, const char *str, size_t n)
{
char *both;
size_t existing_length;
assert(dest != NULL && *dest != NULL);
existing_length = strlen(*dest);
both = resize(*dest, existing_length + n + 1);
if (unlikely(both == NULL))
return false;
memcpy(both + existing_length, str, n);
both[existing_length + n] = '\0';
*dest = both;
return true;
}
bool
ralloc_strcat(char **dest, const char *str)
{
return cat(dest, str, strlen(str));
}
bool
ralloc_strncat(char **dest, const char *str, size_t n)
{
/* Clamp n to the string length */
size_t str_length = strlen(str);
if (str_length < n)
n = str_length;
return cat(dest, str, n);
}
char *
ralloc_asprintf(const void *ctx, const char *fmt, ...)
{
char *ptr;
va_list args;
va_start(args, fmt);
ptr = ralloc_vasprintf(ctx, fmt, args);
va_end(args);
return ptr;
}
/* Return the length of the string that would be generated by a printf-style
* format and argument list, not including the \0 byte.
*/
static size_t
printf_length(const char *fmt, va_list untouched_args)
{
int size;
char junk;
/* Make a copy of the va_list so the original caller can still use it */
va_list args;
va_copy(args, untouched_args);
#ifdef _WIN32
/* We need to use _vcsprintf to calculate the size as vsnprintf returns -1
* if the number of characters to write is greater than count.
*/
size = _vscprintf(fmt, args);
(void)junk;
#else
size = vsnprintf(&junk, 1, fmt, args);
#endif
assert(size >= 0);
va_end(args);
return size;
}
char *
ralloc_vasprintf(const void *ctx, const char *fmt, va_list args)
{
size_t size = printf_length(fmt, args) + 1;
char *ptr = ralloc_size(ctx, size);
if (ptr != NULL)
vsnprintf(ptr, size, fmt, args);
return ptr;
}
bool
ralloc_asprintf_append(char **str, const char *fmt, ...)
{
bool success;
va_list args;
va_start(args, fmt);
success = ralloc_vasprintf_append(str, fmt, args);
va_end(args);
return success;
}
bool
ralloc_vasprintf_append(char **str, const char *fmt, va_list args)
{
size_t existing_length;
assert(str != NULL);
existing_length = *str ? strlen(*str) : 0;
return ralloc_vasprintf_rewrite_tail(str, &existing_length, fmt, args);
}
bool
ralloc_asprintf_rewrite_tail(char **str, size_t *start, const char *fmt, ...)
{
bool success;
va_list args;
va_start(args, fmt);
success = ralloc_vasprintf_rewrite_tail(str, start, fmt, args);
va_end(args);
return success;
}
bool
ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
va_list args)
{
size_t new_length;
char *ptr;
assert(str != NULL);
if (unlikely(*str == NULL)) {
// Assuming a NULL context is probably bad, but it's expected behavior.
*str = ralloc_vasprintf(NULL, fmt, args);
*start = strlen(*str);
return true;
}
new_length = printf_length(fmt, args);
ptr = resize(*str, *start + new_length + 1);
if (unlikely(ptr == NULL))
return false;
vsnprintf(ptr + *start, new_length + 1, fmt, args);
*str = ptr;
*start += new_length;
return true;
}
/***************************************************************************
* Linear allocator for short-lived allocations.
***************************************************************************
*
* The allocator consists of a parent node (2K buffer), which requires
* a ralloc parent, and child nodes (allocations). Child nodes can't be freed
* directly, because the parent doesn't track them. You have to release
* the parent node in order to release all its children.
*
* The allocator uses a fixed-sized buffer with a monotonically increasing
* offset after each allocation. If the buffer is all used, another buffer
* is allocated, sharing the same ralloc parent, so all buffers are at
* the same level in the ralloc hierarchy.
*
* The linear parent node is always the first buffer and keeps track of all
* other buffers.
*/
#define ALIGN_POT(x, y) (((x) + (y) - 1) & ~((y) - 1))
#define MIN_LINEAR_BUFSIZE 2048
#define SUBALLOC_ALIGNMENT sizeof(uintptr_t)
#define LMAGIC 0x87b9c7d3
struct linear_header {
#ifdef DEBUG
unsigned magic; /* for debugging */
#endif
unsigned offset; /* points to the first unused byte in the buffer */
unsigned size; /* size of the buffer */
void *ralloc_parent; /* new buffers will use this */
struct linear_header *next; /* next buffer if we have more */
struct linear_header *latest; /* the only buffer that has free space */
/* After this structure, the buffer begins.
* Each suballocation consists of linear_size_chunk as its header followed
* by the suballocation, so it goes:
*
* - linear_size_chunk
* - allocated space
* - linear_size_chunk
* - allocated space
* etc.
*
* linear_size_chunk is only needed by linear_realloc.
*/
};
struct linear_size_chunk {
unsigned size; /* for realloc */
unsigned _padding;
};
typedef struct linear_header linear_header;
typedef struct linear_size_chunk linear_size_chunk;
#define LINEAR_PARENT_TO_HEADER(parent) \
(linear_header*) \
((char*)(parent) - sizeof(linear_size_chunk) - sizeof(linear_header))
/* Allocate the linear buffer with its header. */
static linear_header *
create_linear_node(void *ralloc_ctx, unsigned min_size)
{
linear_header *node;
min_size += sizeof(linear_size_chunk);
if (likely(min_size < MIN_LINEAR_BUFSIZE))
min_size = MIN_LINEAR_BUFSIZE;
node = ralloc_size(ralloc_ctx, sizeof(linear_header) + min_size);
if (unlikely(!node))
return NULL;
#ifdef DEBUG
node->magic = LMAGIC;
#endif
node->offset = 0;
node->size = min_size;
node->ralloc_parent = ralloc_ctx;
node->next = NULL;
node->latest = node;
return node;
}
void *
linear_alloc_child(void *parent, unsigned size)
{
linear_header *first = LINEAR_PARENT_TO_HEADER(parent);
linear_header *latest = first->latest;
linear_header *new_node;
linear_size_chunk *ptr;
unsigned full_size;
assert(first->magic == LMAGIC);
assert(!latest->next);
size = ALIGN_POT(size, SUBALLOC_ALIGNMENT);
full_size = sizeof(linear_size_chunk) + size;
if (unlikely(latest->offset + full_size > latest->size)) {
/* allocate a new node */
new_node = create_linear_node(latest->ralloc_parent, size);
if (unlikely(!new_node))
return NULL;
first->latest = new_node;
latest->latest = new_node;
latest->next = new_node;
latest = new_node;
}
ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset);
ptr->size = size;
latest->offset += full_size;
return &ptr[1];
}
void *
linear_alloc_parent(void *ralloc_ctx, unsigned size)
{
linear_header *node;
if (unlikely(!ralloc_ctx))
return NULL;
size = ALIGN_POT(size, SUBALLOC_ALIGNMENT);
node = create_linear_node(ralloc_ctx, size);
if (unlikely(!node))
return NULL;
return linear_alloc_child((char*)node +
sizeof(linear_header) +
sizeof(linear_size_chunk), size);
}
void *
linear_zalloc_child(void *parent, unsigned size)
{
void *ptr = linear_alloc_child(parent, size);
if (likely(ptr))
memset(ptr, 0, size);
return ptr;
}
void *
linear_zalloc_parent(void *parent, unsigned size)
{
void *ptr = linear_alloc_parent(parent, size);
if (likely(ptr))
memset(ptr, 0, size);
return ptr;
}
void
linear_free_parent(void *ptr)
{
linear_header *node;
if (unlikely(!ptr))
return;
node = LINEAR_PARENT_TO_HEADER(ptr);
assert(node->magic == LMAGIC);
while (node) {
void *ptr = node;
node = node->next;
ralloc_free(ptr);
}
}
void
ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr)
{
linear_header *node;
if (unlikely(!ptr))
return;
node = LINEAR_PARENT_TO_HEADER(ptr);
assert(node->magic == LMAGIC);
while (node) {
ralloc_steal(new_ralloc_ctx, node);
node->ralloc_parent = new_ralloc_ctx;
node = node->next;
}
}
void *
ralloc_parent_of_linear_parent(void *ptr)
{
linear_header *node = LINEAR_PARENT_TO_HEADER(ptr);
assert(node->magic == LMAGIC);
return node->ralloc_parent;
}
void *
linear_realloc(void *parent, void *old, unsigned new_size)
{
unsigned old_size = 0;
ralloc_header *new_ptr;
new_ptr = linear_alloc_child(parent, new_size);
if (unlikely(!old))
return new_ptr;
old_size = ((linear_size_chunk*)old)[-1].size;
if (likely(new_ptr && old_size))
memcpy(new_ptr, old, MIN2(old_size, new_size));
return new_ptr;
}
/* All code below is pretty much copied from ralloc and only the alloc
* calls are different.
*/
char *
linear_strdup(void *parent, const char *str)
{
unsigned n;
char *ptr;
if (unlikely(!str))
return NULL;
n = strlen(str);
ptr = linear_alloc_child(parent, n + 1);
if (unlikely(!ptr))
return NULL;
memcpy(ptr, str, n);
ptr[n] = '\0';
return ptr;
}
char *
linear_asprintf(void *parent, const char *fmt, ...)
{
char *ptr;
va_list args;
va_start(args, fmt);
ptr = linear_vasprintf(parent, fmt, args);
va_end(args);
return ptr;
}
char *
linear_vasprintf(void *parent, const char *fmt, va_list args)
{
unsigned size = printf_length(fmt, args) + 1;
char *ptr = linear_alloc_child(parent, size);
if (ptr != NULL)
vsnprintf(ptr, size, fmt, args);
return ptr;
}
bool
linear_asprintf_append(void *parent, char **str, const char *fmt, ...)
{
bool success;
va_list args;
va_start(args, fmt);
success = linear_vasprintf_append(parent, str, fmt, args);
va_end(args);
return success;
}
bool
linear_vasprintf_append(void *parent, char **str, const char *fmt, va_list args)
{
size_t existing_length;
assert(str != NULL);
existing_length = *str ? strlen(*str) : 0;
return linear_vasprintf_rewrite_tail(parent, str, &existing_length, fmt, args);
}
bool
linear_asprintf_rewrite_tail(void *parent, char **str, size_t *start,
const char *fmt, ...)
{
bool success;
va_list args;
va_start(args, fmt);
success = linear_vasprintf_rewrite_tail(parent, str, start, fmt, args);
va_end(args);
return success;
}
bool
linear_vasprintf_rewrite_tail(void *parent, char **str, size_t *start,
const char *fmt, va_list args)
{
size_t new_length;
char *ptr;
assert(str != NULL);
if (unlikely(*str == NULL)) {
*str = linear_vasprintf(parent, fmt, args);
*start = strlen(*str);
return true;
}
new_length = printf_length(fmt, args);
ptr = linear_realloc(parent, *str, *start + new_length + 1);
if (unlikely(ptr == NULL))
return false;
vsnprintf(ptr + *start, new_length + 1, fmt, args);
*str = ptr;
*start += new_length;
return true;
}
/* helper routine for strcat/strncat - n is the exact amount to copy */
static bool
linear_cat(void *parent, char **dest, const char *str, unsigned n)
{
char *both;
unsigned existing_length;
assert(dest != NULL && *dest != NULL);
existing_length = strlen(*dest);
both = linear_realloc(parent, *dest, existing_length + n + 1);
if (unlikely(both == NULL))
return false;
memcpy(both + existing_length, str, n);
both[existing_length + n] = '\0';
*dest = both;
return true;
}
bool
linear_strcat(void *parent, char **dest, const char *str)
{
return linear_cat(parent, dest, str, strlen(str));
}