freedreno: Suballocate our long-lived ring objects.

On drawoverhead -test 9 (8 texture changes), this saves us 172kb of
memory.  That's only ~1% of the GEM memory while the test is running, but
more importantly it saves us 29% of the gem BO allocations.

non-TC drawoverhead -test 9 (8 texture change) throughput 0.449019% +/-
0.336296% (n=100), but this gets better as we get better suballocation
density.

Note that this means that all fd_ringbuffer_new_object calls can now
return data aligned to 64 bytes, instead of 4k.  We may find that we need
to increase it if some of our objects (tex consts, sampler consts, etc.)
require more alignment than that.  But, this may help non-drawoverhead
perf if any of our RB objects have a cache in front of them (indirect
consts?) and we don't have most of our data in the same cache set any
more.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11697>
This commit is contained in:
Emma Anholt 2021-07-02 10:51:16 -07:00 committed by Marge Bot
parent eefd93c176
commit 737d4caa83
3 changed files with 26 additions and 3 deletions

View file

@ -171,6 +171,10 @@ static void
msm_pipe_destroy(struct fd_pipe *pipe)
{
struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
if (msm_pipe->suballoc_bo)
fd_bo_del_locked(msm_pipe->suballoc_bo);
close_submitqueue(pipe, msm_pipe->queue_id);
msm_pipe_sp_ringpool_init(msm_pipe);
free(msm_pipe);

View file

@ -56,6 +56,10 @@ struct msm_pipe {
uint32_t queue_id;
struct slab_parent_pool ring_pool;
/* BO for suballocating long-lived objects on the pipe. */
struct fd_bo *suballoc_bo;
uint32_t suballoc_offset;
/**
* The last fence seqno that was flushed to kernel (doesn't mean that it
* is complete, just that the kernel knows about it)

View file

@ -42,6 +42,8 @@
#define INIT_SIZE 0x1000
#define SUBALLOC_SIZE (32 * 1024)
/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
* instead use a condition-variable. Note that pipe->flush() is not expected
* to be a common/hot path.
@ -180,7 +182,7 @@ msm_submit_suballoc_ring_bo(struct fd_submit *submit,
if (!suballoc_bo) {
// TODO possibly larger size for streaming bo?
msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, 0x8000);
msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
msm_ring->offset = 0;
} else {
msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
@ -811,12 +813,25 @@ msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
struct fd_ringbuffer *
msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
{
struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
/* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
msm_ring->offset = align(msm_pipe->suballoc_offset, 64);
if (!msm_pipe->suballoc_bo ||
msm_ring->offset + size > fd_bo_size(msm_pipe->suballoc_bo)) {
if (msm_pipe->suballoc_bo)
fd_bo_del(msm_pipe->suballoc_bo);
msm_pipe->suballoc_bo =
fd_bo_new_ring(pipe->dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
msm_ring->offset = 0;
}
msm_ring->u.pipe = pipe;
msm_ring->offset = 0;
msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
msm_ring->ring_bo = fd_bo_ref(msm_pipe->suballoc_bo);
msm_ring->base.refcnt = 1;
msm_pipe->suballoc_offset = msm_ring->offset + size;
return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
}