panfrost: Introduce invisible pool

Whereas the main batch->pool is CPU read/write, the new
batch->invisible_pool is not. This enables GPU-internal structures that
the CPU must allocate from a pool dynamically but does not read,
corresponding to the BO_INVISIBLE create flag.

The use case is speeding up varying allocation by skipping the
CPU-side mmap/munmap.

We simultaneously half the pool's minimal allocation to avoid negatively
affecting memory usage.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6373>
This commit is contained in:
Alyssa Rosenzweig 2020-08-17 10:31:02 -04:00 committed by Tomeu Vizoso
parent 40c0d7a13d
commit 17c617cdb7
4 changed files with 24 additions and 4 deletions

View file

@ -1492,7 +1492,7 @@ panfrost_emit_varyings(struct panfrost_batch *batch,
unsigned stride, unsigned count)
{
unsigned size = stride * count;
mali_ptr ptr = panfrost_pool_alloc(&batch->pool, size).gpu;
mali_ptr ptr = panfrost_pool_alloc(&batch->invisible_pool, size).gpu;
pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
cfg.stride = stride;

View file

@ -100,6 +100,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
const struct pipe_framebuffer_state *key)
{
struct panfrost_batch *batch = rzalloc(ctx, struct panfrost_batch);
struct panfrost_device *dev = pan_device(ctx->base.screen);
batch->ctx = ctx;
@ -112,7 +113,15 @@ panfrost_create_batch(struct panfrost_context *ctx,
batch->out_sync = panfrost_create_batch_fence(batch);
util_copy_framebuffer_state(&batch->key, key);
batch->pool = panfrost_create_pool(batch, pan_device(ctx->base.screen), 0, true);
/* Preallocate the main pool, since every batch has at least one job
* structure so it will be used */
batch->pool = panfrost_create_pool(batch, dev, 0, true);
/* Don't preallocate the invisible pool, since not every batch will use
* the pre-allocation, particularly if the varyings are larger than the
* preallocation and a reallocation is needed after anyway. */
batch->invisible_pool =
panfrost_create_pool(batch, dev, PAN_BO_INVISIBLE, false);
panfrost_batch_add_fbo_bos(batch);
@ -170,6 +179,9 @@ panfrost_free_batch(struct panfrost_batch *batch)
hash_table_foreach(batch->pool.bos, entry)
panfrost_bo_unreference((struct panfrost_bo *)entry->key);
hash_table_foreach(batch->invisible_pool.bos, entry)
panfrost_bo_unreference((struct panfrost_bo *)entry->key);
util_dynarray_foreach(&batch->dependencies,
struct panfrost_batch_fence *, dep) {
panfrost_batch_fence_unreference(*dep);
@ -985,7 +997,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
submit.jc = first_job_desc;
submit.requirements = reqs;
bo_handles = calloc(batch->pool.bos->entries + batch->bos->entries, sizeof(*bo_handles));
bo_handles = calloc(batch->pool.bos->entries + batch->invisible_pool.bos->entries + batch->bos->entries, sizeof(*bo_handles));
assert(bo_handles);
hash_table_foreach(batch->bos, entry)
@ -994,6 +1006,9 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
hash_table_foreach(batch->pool.bos, entry)
panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++);
hash_table_foreach(batch->invisible_pool.bos, entry)
panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++);
submit.bo_handles = (u64) (uintptr_t) bo_handles;
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
free(bo_handles);

View file

@ -95,6 +95,11 @@ struct panfrost_batch {
/* Pool owned by this batch (released when the batch is released) used for temporary descriptors */
struct pan_pool pool;
/* Pool also owned by this batch that is not CPU mapped (created as
* INVISIBLE) used for private GPU-internal structures, particularly
* varyings */
struct pan_pool invisible_pool;
/* Job scoreboarding state */
struct pan_scoreboard scoreboard;

View file

@ -45,7 +45,7 @@
/* Transient slab size. This is a balance between fragmentation against cache
* locality and ease of bookkeeping */
#define TRANSIENT_SLAB_PAGES (32) /* 128kb */
#define TRANSIENT_SLAB_PAGES (16) /* 64kb */
#define TRANSIENT_SLAB_SIZE (4096 * TRANSIENT_SLAB_PAGES)
/* Maximum number of transient slabs so we don't need dynamic arrays. Most