amdgpu_bo: make cache_entry a extensible array

Improves performance in SPECviewperf13 snx.
e.g.: test10 fps evolution: 270 -> 280.

"pahole radeonsi_dri.so -C amdgpu_winsys_bo" after:

struct amdgpu_winsys_bo {
	struct pb_buffer           base;                 /*     0    32 */
	union {
		struct {
			amdgpu_va_handle va_handle;      /*    32     8 */
			uint32_t   kms_handle;           /*    40     4 */
			int        map_count;            /*    44     4 */
		} real;                                  /*    32    16 */
		[...]
	} u;                                             /*    32    40 */
	/* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
	[...]
	struct pb_cache_entry      cache_entry[];        /*   144     0 */

	/* size: 144, cachelines: 3, members: 17 */
};

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7532>
This commit is contained in:
Pierre-Eric Pelloux-Prayer 2020-11-10 13:47:35 +01:00 committed by Marge Bot
parent 111a1b2e1c
commit 2be8cebd0b
2 changed files with 15 additions and 11 deletions

View file

@ -230,8 +230,8 @@ static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
assert(bo->bo); /* slab buffers have a separate vtbl */
if (bo->u.real.use_reusable_pool)
pb_cache_add_buffer(&bo->u.real.cache_entry);
if (bo->use_reusable_pool)
pb_cache_add_buffer(bo->cache_entry);
else
amdgpu_bo_destroy(_buf);
}
@ -476,6 +476,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
struct amdgpu_winsys_bo *bo;
amdgpu_va_handle va_handle = NULL;
int r;
bool init_pb_cache;
/* VRAM or GTT must be specified, but not both at the same time. */
assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT |
@ -484,13 +485,17 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
alignment = amdgpu_get_optimal_alignment(ws, size, alignment);
bo = CALLOC_STRUCT(amdgpu_winsys_bo);
init_pb_cache = heap >= 0 && (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING);
bo = CALLOC(1, sizeof(struct amdgpu_winsys_bo) +
init_pb_cache * sizeof(struct pb_cache_entry));
if (!bo) {
return NULL;
}
if (heap >= 0) {
pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
if (init_pb_cache) {
bo->use_reusable_pool = true;
pb_cache_init_entry(&ws->bo_cache, bo->cache_entry, &bo->base,
heap);
}
request.alloc_size = size;
@ -1378,7 +1383,6 @@ no_slab:
return NULL;
}
bo->u.real.use_reusable_pool = use_reusable_pool;
return &bo->base;
}
@ -1533,7 +1537,7 @@ static bool amdgpu_bo_get_handle(struct radeon_winsys *rws,
if (!bo->bo)
return false;
bo->u.real.use_reusable_pool = false;
bo->use_reusable_pool = false;
switch (whandle->type) {
case WINSYS_HANDLE_TYPE_SHARED:

View file

@ -59,15 +59,12 @@ struct amdgpu_winsys_bo {
struct pb_buffer base;
union {
struct {
struct pb_cache_entry cache_entry;
amdgpu_va_handle va_handle;
int map_count;
bool use_reusable_pool;
#if DEBUG
struct list_head global_list_item;
#endif
uint32_t kms_handle;
int map_count;
} real;
struct {
struct pb_slab_entry entry;
@ -91,6 +88,7 @@ struct amdgpu_winsys_bo {
amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */
bool is_user_ptr;
bool use_reusable_pool;
uint32_t unique_id;
uint64_t va;
simple_mtx_t lock;
@ -111,6 +109,8 @@ struct amdgpu_winsys_bo {
unsigned num_fences;
unsigned max_fences;
struct pipe_fence_handle **fences;
struct pb_cache_entry cache_entry[];
};
struct amdgpu_slab {