winsys/amdgpu: optimize slab allocation for 2 MB amdgpu page tables

- the slab buffer size increased from 128 KB to 2 MB (PTE fragment size)
- the max suballocated buffer size increased from 64 KB to 256 KB,
  this increases memory usage because it wastes memory
- the number of suballocators increased from 1 to 3 and they are layered
  on top of each other to minimize unused space in slabs

The final increase in memory usage is:
  DeusEx:MD:  1.8%
  DOTA 2:     1.75%
  DiRT Rally: 0.2%

The kernel driver will also receive fewer buffers.
This commit is contained in:
Marek Olšák 2018-11-21 02:15:11 -05:00
parent cf6835485c
commit 5f9ccf827e
3 changed files with 10 additions and 2 deletions

View file

@ -613,6 +613,14 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
if (entry_size <= max_entry_size) {
/* The slab size is twice the size of the largest possible entry. */
slab_size = max_entry_size * 2;
/* The largest slab should have the same size as the PTE fragment
* size to get faster address translation.
*/
if (i == NUM_SLAB_ALLOCATORS - 1 &&
slab_size < ws->info.pte_fragment_size)
slab_size = ws->info.pte_fragment_size;
break;
}
}
assert(slab_size != 0);

View file

@ -311,7 +311,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
unsigned min_slab_order = 9; /* 512 bytes */
unsigned max_slab_order = 16; /* 64 KB - higher numbers increase memory usage */
unsigned max_slab_order = 18; /* 256 KB - higher numbers increase memory usage */
unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
NUM_SLAB_ALLOCATORS;

View file

@ -38,7 +38,7 @@
struct amdgpu_cs;
#define NUM_SLAB_ALLOCATORS 1
#define NUM_SLAB_ALLOCATORS 3
struct amdgpu_winsys {
struct radeon_winsys base;