diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c b/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c index a00f738669c..cb6d3dcfdb7 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c @@ -352,11 +352,25 @@ panvk_graphics_pipeline_create(struct panvk_device *dev, vk_dynamic_graphics_state_fill(&gfx_pipeline->state.dynamic, &state); gfx_pipeline->state.rp = *state.rp; - panvk_pool_init(&gfx_pipeline->base.bin_pool, dev, NULL, - PAN_KMOD_BO_FLAG_EXECUTABLE, 4096, - "Pipeline shader binaries", false); - panvk_pool_init(&gfx_pipeline->base.desc_pool, dev, NULL, 0, 4096, - "Pipeline static state", false); + struct panvk_pool_properties bin_pool_props = { + .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, + .slab_size = 4096, + .label = "Pipeline shader binaries", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; + panvk_pool_init(&gfx_pipeline->base.bin_pool, dev, NULL, &bin_pool_props); + + struct panvk_pool_properties desc_pool_props = { + .create_flags = 0, + .slab_size = 4096, + .label = "Pipeline static state", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; + panvk_pool_init(&gfx_pipeline->base.desc_pool, dev, NULL, &desc_pool_props); /* Make sure the stage info is correct even if no stage info is provided for * this stage in pStages. @@ -437,11 +451,27 @@ panvk_compute_pipeline_create(struct panvk_device *dev, compute_pipeline->base.layout = layout; compute_pipeline->base.type = PANVK_PIPELINE_COMPUTE; + struct panvk_pool_properties bin_pool_props = { + .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, + .slab_size = 4096, + .label = "Pipeline shader binaries", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; panvk_pool_init(&compute_pipeline->base.bin_pool, dev, NULL, - PAN_KMOD_BO_FLAG_EXECUTABLE, 4096, - "Pipeline shader binaries", false); - panvk_pool_init(&compute_pipeline->base.desc_pool, dev, NULL, 0, 4096, - "Pipeline static state", false); + &bin_pool_props); + + struct panvk_pool_properties desc_pool_props = { + .create_flags = 0, + .slab_size = 4096, + .label = "Pipeline static state", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; + panvk_pool_init(&compute_pipeline->base.desc_pool, dev, NULL, + &desc_pool_props); VkResult result = init_pipeline_shader(&compute_pipeline->base, &create_info->stage, alloc, diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c index 5ba507f2d09..198b9aefb2c 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c @@ -355,16 +355,41 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level, cmdbuf->vk.dynamic_graphics_state.ms.sample_locations = &cmdbuf->state.gfx.dynamic.sl; - panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool, 0, - 64 * 1024, "Command buffer descriptor pool", true); - panvk_pool_init( - &cmdbuf->tls_pool, device, &pool->tls_bo_pool, - panvk_debug_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP), 64 * 1024, - "TLS pool", false); - panvk_pool_init( - &cmdbuf->varying_pool, device, &pool->varying_bo_pool, - panvk_debug_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP), 64 * 1024, - "Varyings pool", false); + struct panvk_pool_properties desc_pool_props = { + .create_flags = 0, + .slab_size = 64 * 1024, + .label = "Command buffer descriptor pool", + .prealloc = true, + .owns_bos = true, + .needs_locking = false, + }; + panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool, + &desc_pool_props); + + struct panvk_pool_properties tls_pool_props = { + .create_flags = + panvk_debug_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP), + .slab_size = 64 * 1024, + .label = "TLS pool", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; + panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool, + &tls_pool_props); + + struct panvk_pool_properties var_pool_props = { + .create_flags = + panvk_debug_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP), + .slab_size = 64 * 1024, + .label = "TLS pool", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; + panvk_pool_init(&cmdbuf->varying_pool, device, &pool->varying_bo_pool, + &var_pool_props); + list_inithead(&cmdbuf->batches); *cmdbuf_out = &cmdbuf->vk; return VK_SUCCESS; diff --git a/src/panfrost/vulkan/jm/panvk_vX_meta.c b/src/panfrost/vulkan/jm/panvk_vX_meta.c index 7ed1cba63a1..45afeacc2ba 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_meta.c +++ b/src/panfrost/vulkan/jm/panvk_vX_meta.c @@ -50,10 +50,25 @@ panvk_per_arch(meta_emit_viewport)(struct pan_pool *pool, uint16_t minx, void panvk_per_arch(meta_init)(struct panvk_device *dev) { - panvk_pool_init(&dev->meta.bin_pool, dev, NULL, PAN_KMOD_BO_FLAG_EXECUTABLE, - 16 * 1024, "panvk_meta binary pool", false); - panvk_pool_init(&dev->meta.desc_pool, dev, NULL, 0, 16 * 1024, - "panvk_meta descriptor pool", false); + struct panvk_pool_properties bin_pool_props = { + .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, + .slab_size = 16 * 1024, + .label = "panvk_meta binary pool", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; + struct panvk_pool_properties desc_pool_props = { + .create_flags = 0, + .slab_size = 16 * 1024, + .label = "panvk_meta descriptor pool", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; + + panvk_pool_init(&dev->meta.bin_pool, dev, NULL, &bin_pool_props); + panvk_pool_init(&dev->meta.desc_pool, dev, NULL, &desc_pool_props); panvk_per_arch(meta_blit_init)(dev); panvk_per_arch(meta_copy_init)(dev); panvk_per_arch(meta_clear_init)(dev); diff --git a/src/panfrost/vulkan/jm/panvk_vX_meta_blit.c b/src/panfrost/vulkan/jm/panvk_vX_meta_blit.c index 7280bb8db6d..3a8c06861a1 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_meta_blit.c +++ b/src/panfrost/vulkan/jm/panvk_vX_meta_blit.c @@ -234,12 +234,25 @@ panvk_per_arch(meta_blit_init)(struct panvk_device *dev) { struct panvk_physical_device *phys_dev = to_panvk_physical_device(dev->vk.physical); + struct panvk_pool_properties bin_pool_props = { + .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, + .slab_size = 16 * 1024, + .label = "panvk_meta blitter binary pool", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; + struct panvk_pool_properties desc_pool_props = { + .create_flags = 0, + .slab_size = 16 * 1024, + .label = "panvk_meta blitter descriptor pool", + .prealloc = false, + .owns_bos = true, + .needs_locking = false, + }; - panvk_pool_init(&dev->meta.blitter.bin_pool, dev, NULL, - PAN_KMOD_BO_FLAG_EXECUTABLE, 16 * 1024, - "panvk_meta blitter binary pool", false); - panvk_pool_init(&dev->meta.blitter.desc_pool, dev, NULL, 0, 16 * 1024, - "panvk_meta blitter descriptor pool", false); + panvk_pool_init(&dev->meta.blitter.bin_pool, dev, NULL, &bin_pool_props); + panvk_pool_init(&dev->meta.blitter.desc_pool, dev, NULL, &desc_pool_props); pan_blend_shader_cache_init(&dev->meta.blend_shader_cache, phys_dev->kmod.props.gpu_prod_id); GENX(pan_blitter_cache_init) diff --git a/src/panfrost/vulkan/panvk_mempool.c b/src/panfrost/vulkan/panvk_mempool.c index 6cca98b451a..264a54c744a 100644 --- a/src/panfrost/vulkan/panvk_mempool.c +++ b/src/panfrost/vulkan/panvk_mempool.c @@ -53,8 +53,9 @@ panvk_bo_pool_cleanup(struct panvk_bo_pool *bo_pool) */ static struct panvk_priv_bo * -panvk_pool_alloc_backing(struct panvk_pool *pool, size_t bo_sz) +panvk_pool_alloc_backing(struct panvk_pool *pool, size_t sz) { + size_t bo_sz = ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096); struct panvk_priv_bo *bo; /* If there's a free BO in our BO pool, let's pick it. */ @@ -70,64 +71,111 @@ panvk_pool_alloc_backing(struct panvk_pool *pool, size_t bo_sz) * flags to this function and keep the read/write, * fragment/vertex+tiler pools separate. */ - bo = panvk_priv_bo_create(pool->dev, bo_sz, pool->create_flags, + bo = panvk_priv_bo_create(pool->dev, bo_sz, pool->props.create_flags, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); } - if (pan_kmod_bo_size(bo->bo) == pool->base.slab_size) - list_addtail(&bo->node, &pool->bos); - else - list_addtail(&bo->node, &pool->big_bos); - pool->bo_count++; - pool->transient_bo = bo; - pool->transient_offset = 0; + if (pool->props.owns_bos) { + if (pan_kmod_bo_size(bo->bo) == pool->base.slab_size) + list_addtail(&bo->node, &pool->bos); + else + list_addtail(&bo->node, &pool->big_bos); + pool->bo_count++; + } + + size_t new_remaining_size = pan_kmod_bo_size(bo->bo) - sz; + size_t prev_remaining_size = + pool->transient_bo + ? pan_kmod_bo_size(pool->transient_bo->bo) - pool->transient_offset + : 0; + + /* If there's less room in the new BO after the allocation, we stick to the + * previous one. We also don't hold on BOs that are bigger than the pool + * allocation granularity, to avoid memory fragmentation (retaining a big + * BO which has just one tiny allocation active is not great). */ + if (prev_remaining_size < new_remaining_size && + (pool->props.owns_bos || bo_sz <= pool->base.slab_size)) { + if (!pool->props.owns_bos) + panvk_priv_bo_unref(pool->transient_bo); + + pool->transient_bo = bo; + pool->transient_offset = 0; + } return bo; } +struct panvk_priv_mem +panvk_pool_alloc_mem(struct panvk_pool *pool, struct panvk_pool_alloc_info info) +{ + assert(info.alignment == util_next_power_of_two(info.alignment)); + + if (pool->props.needs_locking) + simple_mtx_lock(&pool->lock); + + /* Find or create a suitable BO */ + struct panvk_priv_bo *bo = pool->transient_bo; + unsigned offset = ALIGN_POT(pool->transient_offset, info.alignment); + + /* If we don't fit, allocate a new backing */ + if (unlikely(bo == NULL || (offset + info.size) >= pool->base.slab_size)) { + bo = panvk_pool_alloc_backing(pool, info.size); + offset = 0; + } + + if (pool->transient_bo == bo) { + pool->transient_offset = offset + info.size; + if (!pool->props.owns_bos) + panvk_priv_bo_ref(bo); + } + + struct panvk_priv_mem ret = { + .bo = bo, + .offset = offset, + }; + + if (pool->props.needs_locking) + simple_mtx_unlock(&pool->lock); + + return ret; +} + static struct panfrost_ptr panvk_pool_alloc_aligned(struct panvk_pool *pool, size_t sz, unsigned alignment) { - assert(alignment == util_next_power_of_two(alignment)); + /* We just return the host/dev address, so callers can't + * release the BO ref they acquired. */ + assert(pool->props.owns_bos); - /* Find or create a suitable BO */ - struct panvk_priv_bo *bo = pool->transient_bo; - unsigned offset = ALIGN_POT(pool->transient_offset, alignment); - - /* If we don't fit, allocate a new backing */ - if (unlikely(bo == NULL || (offset + sz) >= pool->base.slab_size)) { - bo = panvk_pool_alloc_backing( - pool, ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096)); - offset = 0; - } - - pool->transient_offset = offset + sz; - - struct panfrost_ptr ret = { - .cpu = bo->addr.host + offset, - .gpu = bo->addr.dev + offset, + struct panvk_pool_alloc_info info = { + .size = sz, + .alignment = alignment, }; + struct panvk_priv_mem mem = panvk_pool_alloc_mem(pool, info); - return ret; + return (struct panfrost_ptr){ + .cpu = panvk_priv_mem_host_addr(mem), + .gpu = panvk_priv_mem_dev_addr(mem), + }; } PAN_POOL_ALLOCATOR(struct panvk_pool, panvk_pool_alloc_aligned) void panvk_pool_init(struct panvk_pool *pool, struct panvk_device *dev, - struct panvk_bo_pool *bo_pool, unsigned create_flags, - size_t slab_size, const char *label, bool prealloc) + struct panvk_bo_pool *bo_pool, + const struct panvk_pool_properties *props) { memset(pool, 0, sizeof(*pool)); - pan_pool_init(&pool->base, slab_size); + pool->props = *props; + simple_mtx_init(&pool->lock, mtx_plain); + pan_pool_init(&pool->base, pool->props.slab_size); pool->dev = dev; - pool->create_flags = create_flags; - pool->label = label; pool->bo_pool = bo_pool; list_inithead(&pool->bos); list_inithead(&pool->big_bos); - if (prealloc) + if (props->prealloc) panvk_pool_alloc_backing(pool, pool->base.slab_size); } diff --git a/src/panfrost/vulkan/panvk_mempool.h b/src/panfrost/vulkan/panvk_mempool.h index 15bc7daf78d..f86dac63452 100644 --- a/src/panfrost/vulkan/panvk_mempool.h +++ b/src/panfrost/vulkan/panvk_mempool.h @@ -25,11 +25,12 @@ #ifndef __PANVK_POOL_H__ #define __PANVK_POOL_H__ +#include "panvk_priv_bo.h" + #include "pan_pool.h" #include "util/list.h" - -struct panvk_priv_bo; +#include "util/simple_mtx.h" struct panvk_bo_pool { struct list_head free_bos; @@ -43,6 +44,26 @@ panvk_bo_pool_init(struct panvk_bo_pool *bo_pool) void panvk_bo_pool_cleanup(struct panvk_bo_pool *bo_pool); +struct panvk_pool_properties { + /* BO flags to use in the pool */ + unsigned create_flags; + + /* Allocation granularity. */ + size_t slab_size; + + /* Label for created BOs */ + const char *label; + + /* When false, BOs allocated by the pool are not retained by the pool + * when they leave the transient_bo field. */ + bool owns_bos; + + /* If pool is shared and not externally protected, this should be true. */ + bool needs_locking; + + bool prealloc; +}; + /* Represents grow-only memory. It may be owned by the batch (OpenGL), or may be unowned for persistent uploads. */ @@ -53,11 +74,8 @@ struct panvk_pool { /* Parent device for allocation */ struct panvk_device *dev; - /* Label for created BOs */ - const char *label; - - /* BO flags to use in the pool */ - unsigned create_flags; + /* Pool properties. */ + struct panvk_pool_properties props; /* Before allocating a new BO, check if the BO pool has free BOs. * When returning BOs, if bo_pool != NULL, return them to this bo_pool. @@ -69,6 +87,9 @@ struct panvk_pool { struct list_head big_bos; unsigned bo_count; + /* Lock used to protect allocation when the pool is shared. */ + simple_mtx_t lock; + /* Current transient BO */ struct panvk_priv_bo *transient_bo; @@ -83,8 +104,8 @@ to_panvk_pool(struct pan_pool *pool) } void panvk_pool_init(struct panvk_pool *pool, struct panvk_device *dev, - struct panvk_bo_pool *bo_pool, unsigned create_flags, - size_t slab_size, const char *label, bool prealloc); + struct panvk_bo_pool *bo_pool, + const struct panvk_pool_properties *props); void panvk_pool_reset(struct panvk_pool *pool); @@ -98,4 +119,84 @@ panvk_pool_num_bos(struct panvk_pool *pool) void panvk_pool_get_bo_handles(struct panvk_pool *pool, uint32_t *handles); +struct panvk_priv_mem { + struct panvk_priv_bo *bo; + unsigned offset; +}; + +static inline uint64_t +panvk_priv_mem_dev_addr(struct panvk_priv_mem mem) +{ + return mem.bo ? mem.bo->addr.dev + mem.offset : 0; +} + +static inline void * +panvk_priv_mem_host_addr(struct panvk_priv_mem mem) +{ + return mem.bo && mem.bo->addr.host + ? (uint8_t *)mem.bo->addr.host + mem.offset + : NULL; +} + +struct panvk_pool_alloc_info { + size_t size; + unsigned alignment; +}; + +static inline struct panvk_pool_alloc_info +panvk_pool_descs_to_alloc_info(const struct pan_desc_alloc_info *descs) +{ + struct panvk_pool_alloc_info alloc_info = { + .alignment = descs[0].align, + }; + + for (unsigned i = 0; descs[i].size; i++) + alloc_info.size += descs[i].size * descs[i].nelems; + + return alloc_info; +} + +struct panvk_priv_mem panvk_pool_alloc_mem(struct panvk_pool *pool, + struct panvk_pool_alloc_info info); + +static inline void +panvk_pool_free_mem(struct panvk_pool *pool, struct panvk_priv_mem mem) +{ + if (!pool->props.owns_bos) + panvk_priv_bo_unref(mem.bo); +} + +static inline struct panvk_priv_mem +panvk_pool_upload_aligned(struct panvk_pool *pool, const void *data, size_t sz, + unsigned alignment) +{ + struct panvk_pool_alloc_info info = { + .size = sz, + .alignment = alignment, + }; + + struct panvk_priv_mem mem = panvk_pool_alloc_mem(pool, info); + memcpy(panvk_priv_mem_host_addr(mem), data, sz); + return mem; +} + +static inline struct panvk_priv_mem +panvk_pool_upload(struct panvk_pool *pool, const void *data, size_t sz) +{ + return panvk_pool_upload_aligned(pool, data, sz, sz); +} + +#define panvk_pool_alloc_desc(pool, name) \ + panvk_pool_alloc_mem(pool, panvk_pool_descs_to_alloc_info( \ + PAN_DESC_AGGREGATE(PAN_DESC(name)))) + +#define panvk_pool_alloc_desc_array(pool, count, name) \ + panvk_pool_alloc_mem(pool, \ + panvk_pool_descs_to_alloc_info( \ + PAN_DESC_AGGREGATE(PAN_DESC_ARRAY(count, name)))) + +#define panvk_pool_alloc_desc_aggregate(pool, ...) \ + panvk_pool_alloc_mem( \ + pool, panvk_pool_descs_to_alloc_info(PAN_DESC_AGGREGATE(__VA_ARGS__))) + #endif diff --git a/src/panfrost/vulkan/panvk_vX_blend.c b/src/panfrost/vulkan/panvk_vX_blend.c index 285c698ba54..6903119f303 100644 --- a/src/panfrost/vulkan/panvk_vX_blend.c +++ b/src/panfrost/vulkan/panvk_vX_blend.c @@ -27,8 +27,15 @@ panvk_per_arch(blend_shader_cache_init)(struct panvk_device *dev) simple_mtx_init(&cache->lock, mtx_plain); - panvk_pool_init(&cache->bin_pool, dev, NULL, PAN_KMOD_BO_FLAG_EXECUTABLE, - 16 * 1024, "blend shaders", false); + struct panvk_pool_properties bin_pool_props = { + .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, + .slab_size = 16 * 1024, + .label = "blend shaders", + .owns_bos = true, + .prealloc = false, + .needs_locking = false, + }; + panvk_pool_init(&cache->bin_pool, dev, NULL, &bin_pool_props); cache->ht = pan_blend_shader_key_table_create(NULL); if (!cache->ht)