mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
winsys/radeon: enable buffer allocation from slabs
Only enable for chips with GPUVM, because older driver paths do not take the required offset into account. Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
parent
a1e391e39d
commit
fb827c055c
4 changed files with 209 additions and 2 deletions
|
|
@ -42,6 +42,13 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
|
||||||
|
static struct pb_buffer *
|
||||||
|
radeon_winsys_bo_create(struct radeon_winsys *rws,
|
||||||
|
uint64_t size,
|
||||||
|
unsigned alignment,
|
||||||
|
enum radeon_bo_domain domain,
|
||||||
|
enum radeon_bo_flag flags);
|
||||||
|
|
||||||
static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
|
static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
|
||||||
{
|
{
|
||||||
return (struct radeon_bo *)bo;
|
return (struct radeon_bo *)bo;
|
||||||
|
|
@ -700,6 +707,120 @@ bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
|
||||||
return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
|
return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
|
||||||
|
{
|
||||||
|
struct radeon_bo *bo = NULL; /* fix container_of */
|
||||||
|
bo = container_of(entry, bo, u.slab.entry);
|
||||||
|
|
||||||
|
return radeon_bo_can_reclaim(&bo->base);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
|
||||||
|
{
|
||||||
|
struct radeon_bo *bo = radeon_bo(_buf);
|
||||||
|
|
||||||
|
assert(!bo->handle);
|
||||||
|
|
||||||
|
pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
|
||||||
|
radeon_bo_slab_destroy
|
||||||
|
/* other functions are never called */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
|
||||||
|
unsigned entry_size,
|
||||||
|
unsigned group_index)
|
||||||
|
{
|
||||||
|
struct radeon_drm_winsys *ws = priv;
|
||||||
|
struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
|
||||||
|
enum radeon_bo_domain domains;
|
||||||
|
enum radeon_bo_flag flags = 0;
|
||||||
|
unsigned base_hash;
|
||||||
|
|
||||||
|
if (!slab)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (heap & 1)
|
||||||
|
flags |= RADEON_FLAG_GTT_WC;
|
||||||
|
if (heap & 2)
|
||||||
|
flags |= RADEON_FLAG_CPU_ACCESS;
|
||||||
|
|
||||||
|
switch (heap >> 2) {
|
||||||
|
case 0:
|
||||||
|
domains = RADEON_DOMAIN_VRAM;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
case 1:
|
||||||
|
domains = RADEON_DOMAIN_VRAM_GTT;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
domains = RADEON_DOMAIN_GTT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
|
||||||
|
64 * 1024, 64 * 1024,
|
||||||
|
domains, flags));
|
||||||
|
if (!slab->buffer)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
assert(slab->buffer->handle);
|
||||||
|
|
||||||
|
slab->base.num_entries = slab->buffer->base.size / entry_size;
|
||||||
|
slab->base.num_free = slab->base.num_entries;
|
||||||
|
slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
|
||||||
|
if (!slab->entries)
|
||||||
|
goto fail_buffer;
|
||||||
|
|
||||||
|
LIST_INITHEAD(&slab->base.free);
|
||||||
|
|
||||||
|
base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < slab->base.num_entries; ++i) {
|
||||||
|
struct radeon_bo *bo = &slab->entries[i];
|
||||||
|
|
||||||
|
bo->base.alignment = entry_size;
|
||||||
|
bo->base.usage = slab->buffer->base.usage;
|
||||||
|
bo->base.size = entry_size;
|
||||||
|
bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
|
||||||
|
bo->rws = ws;
|
||||||
|
bo->va = slab->buffer->va + i * entry_size;
|
||||||
|
bo->initial_domain = domains;
|
||||||
|
bo->hash = base_hash + i;
|
||||||
|
bo->u.slab.entry.slab = &slab->base;
|
||||||
|
bo->u.slab.entry.group_index = group_index;
|
||||||
|
bo->u.slab.real = slab->buffer;
|
||||||
|
|
||||||
|
LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
|
||||||
|
}
|
||||||
|
|
||||||
|
return &slab->base;
|
||||||
|
|
||||||
|
fail_buffer:
|
||||||
|
radeon_bo_reference(&slab->buffer, NULL);
|
||||||
|
fail:
|
||||||
|
FREE(slab);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
|
||||||
|
{
|
||||||
|
struct radeon_slab *slab = (struct radeon_slab *)pslab;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < slab->base.num_entries; ++i) {
|
||||||
|
struct radeon_bo *bo = &slab->entries[i];
|
||||||
|
for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
|
||||||
|
radeon_bo_reference(&bo->u.slab.fences[j], NULL);
|
||||||
|
FREE(bo->u.slab.fences);
|
||||||
|
}
|
||||||
|
|
||||||
|
FREE(slab->entries);
|
||||||
|
radeon_bo_reference(&slab->buffer, NULL);
|
||||||
|
FREE(slab);
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned eg_tile_split(unsigned tile_split)
|
static unsigned eg_tile_split(unsigned tile_split)
|
||||||
{
|
{
|
||||||
switch (tile_split) {
|
switch (tile_split) {
|
||||||
|
|
@ -823,6 +944,54 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
|
||||||
if (size > UINT_MAX)
|
if (size > UINT_MAX)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
/* Sub-allocate small buffers from slabs. */
|
||||||
|
if (!(flags & RADEON_FLAG_HANDLE) &&
|
||||||
|
size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
|
||||||
|
ws->info.has_virtual_memory &&
|
||||||
|
alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
|
||||||
|
struct pb_slab_entry *entry;
|
||||||
|
unsigned heap = 0;
|
||||||
|
|
||||||
|
if (flags & RADEON_FLAG_GTT_WC)
|
||||||
|
heap |= 1;
|
||||||
|
if (flags & RADEON_FLAG_CPU_ACCESS)
|
||||||
|
heap |= 2;
|
||||||
|
if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
|
||||||
|
goto no_slab;
|
||||||
|
|
||||||
|
switch (domain) {
|
||||||
|
case RADEON_DOMAIN_VRAM:
|
||||||
|
heap |= 0 * 4;
|
||||||
|
break;
|
||||||
|
case RADEON_DOMAIN_VRAM_GTT:
|
||||||
|
heap |= 1 * 4;
|
||||||
|
break;
|
||||||
|
case RADEON_DOMAIN_GTT:
|
||||||
|
heap |= 2 * 4;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
goto no_slab;
|
||||||
|
}
|
||||||
|
|
||||||
|
entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
|
||||||
|
if (!entry) {
|
||||||
|
/* Clear the cache and try again. */
|
||||||
|
pb_cache_release_all_buffers(&ws->bo_cache);
|
||||||
|
|
||||||
|
entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
|
||||||
|
}
|
||||||
|
if (!entry)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
bo = NULL;
|
||||||
|
bo = container_of(entry, bo, u.slab.entry);
|
||||||
|
|
||||||
|
pipe_reference_init(&bo->base.reference, 1);
|
||||||
|
|
||||||
|
return &bo->base;
|
||||||
|
}
|
||||||
|
no_slab:
|
||||||
|
|
||||||
/* This flag is irrelevant for the cache. */
|
/* This flag is irrelevant for the cache. */
|
||||||
flags &= ~RADEON_FLAG_HANDLE;
|
flags &= ~RADEON_FLAG_HANDLE;
|
||||||
|
|
||||||
|
|
@ -862,6 +1031,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
|
||||||
pb_cache_bucket);
|
pb_cache_bucket);
|
||||||
if (!bo) {
|
if (!bo) {
|
||||||
/* Clear the cache and try again. */
|
/* Clear the cache and try again. */
|
||||||
|
pb_slabs_reclaim(&ws->bo_slabs);
|
||||||
pb_cache_release_all_buffers(&ws->bo_cache);
|
pb_cache_release_all_buffers(&ws->bo_cache);
|
||||||
bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
|
bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
|
||||||
pb_cache_bucket);
|
pb_cache_bucket);
|
||||||
|
|
|
||||||
|
|
@ -74,10 +74,22 @@ struct radeon_bo {
|
||||||
int num_active_ioctls;
|
int num_active_ioctls;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct radeon_slab {
|
||||||
|
struct pb_slab base;
|
||||||
|
struct radeon_bo *buffer;
|
||||||
|
struct radeon_bo *entries;
|
||||||
|
};
|
||||||
|
|
||||||
void radeon_bo_destroy(struct pb_buffer *_buf);
|
void radeon_bo_destroy(struct pb_buffer *_buf);
|
||||||
bool radeon_bo_can_reclaim(struct pb_buffer *_buf);
|
bool radeon_bo_can_reclaim(struct pb_buffer *_buf);
|
||||||
void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws);
|
void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws);
|
||||||
|
|
||||||
|
bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry);
|
||||||
|
struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
|
||||||
|
unsigned entry_size,
|
||||||
|
unsigned group_index);
|
||||||
|
void radeon_bo_slab_free(void *priv, struct pb_slab *slab);
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src)
|
void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -545,6 +545,8 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
|
||||||
pipe_mutex_destroy(ws->hyperz_owner_mutex);
|
pipe_mutex_destroy(ws->hyperz_owner_mutex);
|
||||||
pipe_mutex_destroy(ws->cmask_owner_mutex);
|
pipe_mutex_destroy(ws->cmask_owner_mutex);
|
||||||
|
|
||||||
|
if (ws->info.has_virtual_memory)
|
||||||
|
pb_slabs_deinit(&ws->bo_slabs);
|
||||||
pb_cache_deinit(&ws->bo_cache);
|
pb_cache_deinit(&ws->bo_cache);
|
||||||
|
|
||||||
if (ws->gen >= DRV_R600) {
|
if (ws->gen >= DRV_R600) {
|
||||||
|
|
@ -759,10 +761,25 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
||||||
radeon_bo_destroy,
|
radeon_bo_destroy,
|
||||||
radeon_bo_can_reclaim);
|
radeon_bo_can_reclaim);
|
||||||
|
|
||||||
|
if (ws->info.has_virtual_memory) {
|
||||||
|
/* There is no fundamental obstacle to using slab buffer allocation
|
||||||
|
* without GPUVM, but enabling it requires making sure that the drivers
|
||||||
|
* honor the address offset.
|
||||||
|
*/
|
||||||
|
if (!pb_slabs_init(&ws->bo_slabs,
|
||||||
|
RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2,
|
||||||
|
12,
|
||||||
|
ws,
|
||||||
|
radeon_bo_can_reclaim_slab,
|
||||||
|
radeon_bo_slab_alloc,
|
||||||
|
radeon_bo_slab_free))
|
||||||
|
goto fail_cache;
|
||||||
|
}
|
||||||
|
|
||||||
if (ws->gen >= DRV_R600) {
|
if (ws->gen >= DRV_R600) {
|
||||||
ws->surf_man = radeon_surface_manager_new(ws->fd);
|
ws->surf_man = radeon_surface_manager_new(ws->fd);
|
||||||
if (!ws->surf_man)
|
if (!ws->surf_man)
|
||||||
goto fail;
|
goto fail_slab;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* init reference */
|
/* init reference */
|
||||||
|
|
@ -819,7 +836,10 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
||||||
|
|
||||||
return &ws->base;
|
return &ws->base;
|
||||||
|
|
||||||
fail:
|
fail_slab:
|
||||||
|
if (ws->info.has_virtual_memory)
|
||||||
|
pb_slabs_deinit(&ws->bo_slabs);
|
||||||
|
fail_cache:
|
||||||
pb_cache_deinit(&ws->bo_cache);
|
pb_cache_deinit(&ws->bo_cache);
|
||||||
fail1:
|
fail1:
|
||||||
pipe_mutex_unlock(fd_tab_mutex);
|
pipe_mutex_unlock(fd_tab_mutex);
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@
|
||||||
|
|
||||||
#include "gallium/drivers/radeon/radeon_winsys.h"
|
#include "gallium/drivers/radeon/radeon_winsys.h"
|
||||||
#include "pipebuffer/pb_cache.h"
|
#include "pipebuffer/pb_cache.h"
|
||||||
|
#include "pipebuffer/pb_slab.h"
|
||||||
#include "util/u_queue.h"
|
#include "util/u_queue.h"
|
||||||
#include "util/list.h"
|
#include "util/list.h"
|
||||||
#include <radeon_drm.h>
|
#include <radeon_drm.h>
|
||||||
|
|
@ -62,10 +63,14 @@ enum radeon_generation {
|
||||||
DRV_SI
|
DRV_SI
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define RADEON_SLAB_MIN_SIZE_LOG2 9
|
||||||
|
#define RADEON_SLAB_MAX_SIZE_LOG2 14
|
||||||
|
|
||||||
struct radeon_drm_winsys {
|
struct radeon_drm_winsys {
|
||||||
struct radeon_winsys base;
|
struct radeon_winsys base;
|
||||||
struct pipe_reference reference;
|
struct pipe_reference reference;
|
||||||
struct pb_cache bo_cache;
|
struct pb_cache bo_cache;
|
||||||
|
struct pb_slabs bo_slabs;
|
||||||
|
|
||||||
int fd; /* DRM file descriptor */
|
int fd; /* DRM file descriptor */
|
||||||
int num_cs; /* The number of command streams created. */
|
int num_cs; /* The number of command streams created. */
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue