radv/amdgpu: Add winsys implementation of virtual buffers.

v2: - Added comments.
    - Fixed a double unmap bug.
    - Actually unmap the non-edge old ranges.

Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Bas Nieuwenhuizen 2017-02-04 01:18:05 +01:00
parent 78ee8b3f84
commit 715df30a4e
4 changed files with 350 additions and 27 deletions

View file

@ -47,6 +47,7 @@ enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_GTT_WC = (1 << 0), RADEON_FLAG_GTT_WC = (1 << 0),
RADEON_FLAG_CPU_ACCESS = (1 << 1), RADEON_FLAG_CPU_ACCESS = (1 << 1),
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
RADEON_FLAG_VIRTUAL = (1 << 3)
}; };
enum radeon_bo_usage { /* bitfield */ enum radeon_bo_usage { /* bitfield */
@ -289,6 +290,10 @@ struct radeon_winsys {
void (*buffer_set_metadata)(struct radeon_winsys_bo *bo, void (*buffer_set_metadata)(struct radeon_winsys_bo *bo,
struct radeon_bo_metadata *md); struct radeon_bo_metadata *md);
void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent,
uint64_t offset, uint64_t size,
struct radeon_winsys_bo *bo, uint64_t bo_offset);
struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws); struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);

View file

@ -34,19 +34,201 @@
#include <amdgpu_drm.h> #include <amdgpu_drm.h>
#include <inttypes.h> #include <inttypes.h>
static void
radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
const struct radv_amdgpu_map_range *range)
{
assert(range->size);
if (!range->bo)
return; /* TODO: PRT mapping */
int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
if (r)
abort();
}
static void
radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
const struct radv_amdgpu_map_range *range)
{
assert(range->size);
if (!range->bo)
return; /* TODO: PRT mapping */
int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
if (r)
abort();
}
static void
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
{
bo->bo_count = 0;
for (uint32_t i = 0; i < bo->range_count; ++i) {
bool found = false;
if (!bo->ranges[i].bo)
continue;
for(uint32_t j = 0; j < bo->bo_count; ++j) {
if (bo->bos[j] == bo->ranges[i].bo) {
found = true;
break;
}
}
if (!found) {
if (bo->bo_capacity == bo->bo_count) {
bo->bos = realloc(bo->bos,
(bo->bo_capacity + 1) * sizeof(struct radv_amdgpu_bo *));
++bo->bo_capacity;
}
bo->bos[bo->bo_count++] = bo->ranges[i].bo;
}
}
}
static void
radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
uint64_t offset, uint64_t size,
struct radeon_winsys_bo *_bo, uint64_t bo_offset)
{
struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
int range_count_delta, new_idx;
int first = 0, last;
struct radv_amdgpu_map_range new_first, new_last;
assert(parent->is_virtual);
assert(!bo || !bo->is_virtual);
if (!size)
return;
/* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
if (parent->range_capacity - parent->range_count < 2) {
parent->range_capacity += 2;
parent->ranges = realloc(parent->ranges,
parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
}
/*
* [first, last] is exactly the range of ranges that either overlap the
* new parent, or are adjacent to it. This corresponds to the bind ranges
* that may change.
*/
while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
++first;
last = first;
while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
++last;
/* Whether the first or last range are going to be totally removed or just
* resized/left alone. Note that in the case of first == last, we will split
* this into a part before and after the new range. The remove flag is then
* whether to not create the corresponding split part. */
bool remove_first = parent->ranges[first].offset == offset;
bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
bool unmapped_first = false;
assert(parent->ranges[first].offset <= offset);
assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
/* Try to merge the new range with the first range. */
if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
size += offset - parent->ranges[first].offset;
offset = parent->ranges[first].offset;
remove_first = true;
}
/* Try to merge the new range with the last range. */
if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
size = parent->ranges[last].offset + parent->ranges[last].size - offset;
remove_last = true;
}
range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
new_idx = first + !remove_first;
/* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
for (int i = first + 1; i < last; ++i)
radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
/* If the first/last range are not left alone we unmap then and optionally map
* them again after modifications. Not that this implicitly can do the splitting
* if first == last. */
new_first = parent->ranges[first];
new_last = parent->ranges[last];
if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
unmapped_first = true;
if (!remove_first) {
new_first.size = offset - new_first.offset;
radv_amdgpu_winsys_virtual_map(parent, &new_first);
}
}
if (parent->ranges[last].offset < offset + size || remove_last) {
if (first != last || !unmapped_first)
radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
if (!remove_last) {
new_last.size -= offset + size - new_last.offset;
new_last.offset = offset + size;
radv_amdgpu_winsys_virtual_map(parent, &new_last);
}
}
/* Moves the range list after last to account for the changed number of ranges. */
memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
if (!remove_first)
parent->ranges[first] = new_first;
if (!remove_last)
parent->ranges[new_idx + 1] = new_last;
/* Actually set up the new range. */
parent->ranges[new_idx].offset = offset;
parent->ranges[new_idx].size = size;
parent->ranges[new_idx].bo = bo;
parent->ranges[new_idx].bo_offset = bo_offset;
radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
parent->range_count += range_count_delta;
radv_amdgpu_winsys_rebuild_bo_list(parent);
}
static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
{ {
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
if (bo->ws->debug_all_bos) { if (bo->is_virtual) {
pthread_mutex_lock(&bo->ws->global_bo_list_lock); for (uint32_t i = 0; i < bo->range_count; ++i) {
LIST_DEL(&bo->global_list_item); radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
bo->ws->num_buffers--; }
pthread_mutex_unlock(&bo->ws->global_bo_list_lock); free(bo->bos);
free(bo->ranges);
} else {
if (bo->ws->debug_all_bos) {
pthread_mutex_lock(&bo->ws->global_bo_list_lock);
LIST_DEL(&bo->global_list_item);
bo->ws->num_buffers--;
pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
}
amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_bo_free(bo->bo);
} }
amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_va_range_free(bo->va_handle); amdgpu_va_range_free(bo->va_handle);
amdgpu_bo_free(bo->bo);
FREE(bo); FREE(bo);
} }
@ -81,6 +263,31 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
return NULL; return NULL;
} }
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
size, alignment, 0, &va, &va_handle, 0);
if (r)
goto error_va_alloc;
bo->va = va;
bo->va_handle = va_handle;
bo->size = size;
bo->ws = ws;
bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
if (flags & RADEON_FLAG_VIRTUAL) {
bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
bo->range_count = 1;
bo->range_capacity = 1;
bo->ranges[0].offset = 0;
bo->ranges[0].size = size;
bo->ranges[0].bo = NULL;
bo->ranges[0].bo_offset = 0;
radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
return (struct radeon_winsys_bo *)bo;
}
request.alloc_size = size; request.alloc_size = size;
request.phys_alignment = alignment; request.phys_alignment = alignment;
@ -105,31 +312,22 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
goto error_bo_alloc; goto error_bo_alloc;
} }
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
size, alignment, 0, &va, &va_handle, 0);
if (r)
goto error_va_alloc;
r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP); r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
if (r) if (r)
goto error_va_map; goto error_va_map;
bo->bo = buf_handle; bo->bo = buf_handle;
bo->va = va;
bo->va_handle = va_handle;
bo->initial_domain = initial_domain; bo->initial_domain = initial_domain;
bo->size = size;
bo->is_shared = false; bo->is_shared = false;
bo->ws = ws;
radv_amdgpu_add_buffer_to_global_list(bo); radv_amdgpu_add_buffer_to_global_list(bo);
return (struct radeon_winsys_bo *)bo; return (struct radeon_winsys_bo *)bo;
error_va_map: error_va_map:
amdgpu_va_range_free(va_handle);
error_va_alloc:
amdgpu_bo_free(buf_handle); amdgpu_bo_free(buf_handle);
error_bo_alloc: error_bo_alloc:
amdgpu_va_range_free(va_handle);
error_va_alloc:
FREE(bo); FREE(bo);
return NULL; return NULL;
} }
@ -296,4 +494,5 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd; ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd; ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata; ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
} }

View file

@ -31,17 +31,40 @@
#include "radv_amdgpu_winsys.h" #include "radv_amdgpu_winsys.h"
struct radv_amdgpu_winsys_bo {
amdgpu_bo_handle bo;
amdgpu_va_handle va_handle;
uint64_t va; struct radv_amdgpu_map_range {
enum radeon_bo_domain initial_domain; uint64_t offset;
uint64_t size; uint64_t size;
bool is_shared; struct radv_amdgpu_winsys_bo *bo;
uint64_t bo_offset;
};
struct radv_amdgpu_winsys_bo {
amdgpu_va_handle va_handle;
uint64_t va;
uint64_t size;
struct radv_amdgpu_winsys *ws; struct radv_amdgpu_winsys *ws;
struct list_head global_list_item; bool is_virtual;
union {
/* physical bo */
struct {
amdgpu_bo_handle bo;
enum radeon_bo_domain initial_domain;
bool is_shared;
struct list_head global_list_item;
};
/* virtual bo */
struct {
struct radv_amdgpu_map_range *ranges;
uint32_t range_count;
uint32_t range_capacity;
struct radv_amdgpu_winsys_bo **bos;
uint32_t bo_count;
uint32_t bo_capacity;
};
};
}; };
static inline static inline

View file

@ -34,6 +34,11 @@
#include "radv_amdgpu_bo.h" #include "radv_amdgpu_bo.h"
#include "sid.h" #include "sid.h"
enum {
VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024
};
struct radv_amdgpu_cs { struct radv_amdgpu_cs {
struct radeon_winsys_cs base; struct radeon_winsys_cs base;
struct radv_amdgpu_winsys *ws; struct radv_amdgpu_winsys *ws;
@ -56,6 +61,12 @@ struct radv_amdgpu_cs {
int buffer_hash_table[1024]; int buffer_hash_table[1024];
unsigned hw_ip; unsigned hw_ip;
unsigned num_virtual_buffers;
unsigned max_num_virtual_buffers;
struct radeon_winsys_bo **virtual_buffers;
uint8_t *virtual_buffer_priorities;
int *virtual_buffer_hash_table;
}; };
static inline struct radv_amdgpu_cs * static inline struct radv_amdgpu_cs *
@ -141,6 +152,9 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
free(cs->old_ib_buffers); free(cs->old_ib_buffers);
free(cs->virtual_buffers);
free(cs->virtual_buffer_priorities);
free(cs->virtual_buffer_hash_table);
free(cs->handles); free(cs->handles);
free(cs->priorities); free(cs->priorities);
free(cs); free(cs);
@ -315,7 +329,13 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
cs->buffer_hash_table[hash] = -1; cs->buffer_hash_table[hash] = -1;
} }
for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
cs->virtual_buffer_hash_table[hash] = -1;
}
cs->num_buffers = 0; cs->num_buffers = 0;
cs->num_virtual_buffers = 0;
if (cs->ws->use_ib_bos) { if (cs->ws->use_ib_bos) {
cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8); cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
@ -380,6 +400,49 @@ static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
++cs->num_buffers; ++cs->num_buffers;
} }
static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs *_cs,
struct radeon_winsys_bo *bo,
uint8_t priority)
{
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
if (!cs->virtual_buffer_hash_table) {
cs->virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
cs->virtual_buffer_hash_table[i] = -1;
}
if (cs->virtual_buffer_hash_table[hash] >= 0) {
int idx = cs->virtual_buffer_hash_table[hash];
if (cs->virtual_buffers[idx] == bo) {
cs->virtual_buffer_priorities[idx] = MAX2(cs->virtual_buffer_priorities[idx], priority);
return;
}
for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
if (cs->virtual_buffers[i] == bo) {
cs->virtual_buffer_priorities[i] = MAX2(cs->virtual_buffer_priorities[i], priority);
cs->virtual_buffer_hash_table[hash] = i;
return;
}
}
}
if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
cs->max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
cs->virtual_buffers = realloc(cs->virtual_buffers, sizeof(struct radv_amdgpu_virtual_virtual_buffer*) * cs->max_num_virtual_buffers);
cs->virtual_buffer_priorities = realloc(cs->virtual_buffer_priorities, sizeof(uint8_t) * cs->max_num_virtual_buffers);
}
cs->virtual_buffers[cs->num_virtual_buffers] = bo;
cs->virtual_buffer_priorities[cs->num_virtual_buffers] = priority;
cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
++cs->num_virtual_buffers;
}
static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs, static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
struct radeon_winsys_bo *_bo, struct radeon_winsys_bo *_bo,
uint8_t priority) uint8_t priority)
@ -387,6 +450,11 @@ static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
if (bo->is_virtual) {
radv_amdgpu_cs_add_virtual_buffer(_cs, _bo, priority);
return;
}
radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority); radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
} }
@ -401,6 +469,11 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
child->priorities[i]); child->priorities[i]);
} }
for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i],
child->virtual_buffer_priorities[i]);
}
if (parent->ws->use_ib_bos) { if (parent->ws->use_ib_bos) {
if (parent->base.cdw + 4 > parent->base.max_dw) if (parent->base.cdw + 4 > parent->base.max_dw)
radv_amdgpu_cs_grow(&parent->base, 4); radv_amdgpu_cs_grow(&parent->base, 4);
@ -449,7 +522,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
bo_list); bo_list);
free(handles); free(handles);
pthread_mutex_unlock(&ws->global_bo_list_lock); pthread_mutex_unlock(&ws->global_bo_list_lock);
} else if (count == 1 && !extra_bo && !extra_cs) { } else if (count == 1 && !extra_bo && !extra_cs &&
!radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles, r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
cs->priorities, bo_list); cs->priorities, bo_list);
@ -459,6 +533,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
for (unsigned i = 0; i < count; ++i) { for (unsigned i = 0; i < count; ++i) {
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i]; struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
total_buffer_count += cs->num_buffers; total_buffer_count += cs->num_buffers;
for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
} }
if (extra_cs) { if (extra_cs) {
@ -512,6 +588,26 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
++unique_bo_count; ++unique_bo_count;
} }
} }
for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
for(unsigned k = 0; k < virtual_bo->bo_count; ++k) {
struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
bool found = false;
for (unsigned m = 0; m < unique_bo_count; ++m) {
if (handles[m] == bo->bo) {
found = true;
priorities[m] = MAX2(priorities[m],
cs->virtual_buffer_priorities[j]);
break;
}
}
if (!found) {
handles[unique_bo_count] = bo->bo;
priorities[unique_bo_count] = cs->virtual_buffer_priorities[j];
++unique_bo_count;
}
}
}
} }
r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles, r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
priorities, bo_list); priorities, bo_list);