From b5e47ba8941467a4e2f53907a6645a0703852e62 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 25 Jul 2025 15:35:45 +0000 Subject: [PATCH] pan/kmod: Add new helpers to sync BO CPU mappings pan_kmod_flush_bo_map_syncs() queues CPU-sync operations, and pan_kmod_flush_bo_map_syncs_locked() ensures all queued operations are flushed/executed. Those will be used when we start adding support for CPU-cached mappings. Reviewed-by: Boris Brezillon Reviewed-by: Christoph Pillmayer Part-of: --- src/panfrost/lib/kmod/pan_kmod.c | 104 +++++++++++++++++++++++ src/panfrost/lib/kmod/pan_kmod.h | 44 ++++++++++ src/panfrost/lib/kmod/pan_kmod_backend.h | 18 ++++ src/panfrost/lib/kmod/panfrost_kmod.c | 54 ++++++++++++ src/panfrost/lib/kmod/panthor_kmod.c | 57 ++++++++++++- 5 files changed, 276 insertions(+), 1 deletion(-) diff --git a/src/panfrost/lib/kmod/pan_kmod.c b/src/panfrost/lib/kmod/pan_kmod.c index a4370bb86d5..2546c496c87 100644 --- a/src/panfrost/lib/kmod/pan_kmod.c +++ b/src/panfrost/lib/kmod/pan_kmod.c @@ -7,9 +7,11 @@ #include #include +#include "util/cache_ops.h" #include "util/u_memory.h" #include "util/macros.h" #include "pan_kmod.h" +#include "pan_kmod_backend.h" extern const struct pan_kmod_ops panfrost_kmod_ops; extern const struct pan_kmod_ops panthor_kmod_ops; @@ -204,3 +206,105 @@ err_unlock: return NULL; } +void +pan_kmod_flush_bo_map_syncs_locked(struct pan_kmod_dev *dev) +{ + ASSERTED int ret = dev->ops->flush_bo_map_syncs(dev); + assert(!ret); + + util_dynarray_foreach(&dev->pending_bo_syncs.array, + struct pan_kmod_deferred_bo_sync, sync) + sync->bo->has_pending_deferred_syncs = false; + + util_dynarray_clear(&dev->pending_bo_syncs.array); +} + +void +pan_kmod_flush_bo_map_syncs(struct pan_kmod_dev *dev) +{ + if (dev->props.is_io_coherent) + return; + + /* Barrier to make sure all flush/invalidate requests are effective. */ + if (p_atomic_xchg(&dev->pending_bo_syncs.user_cache_ops_pending, false)) + util_post_flush_inval_fence(); + + /* This can be racy, but that's fine, because we expect a future call to + * pan_kmod_flush_bo_map_syncs() if new ops are being added while we check + * this value. + */ + if (!util_dynarray_num_elements(&dev->pending_bo_syncs.array, + struct pan_kmod_deferred_bo_sync)) + return; + + simple_mtx_lock(&dev->pending_bo_syncs.lock); + pan_kmod_flush_bo_map_syncs_locked(dev); + simple_mtx_unlock(&dev->pending_bo_syncs.lock); +} + +/* Arbitrary limit for now. Pick something bigger or make it configurable if it + * becomes problematic. + */ +#define MAX_PENDING_SYNC_OPS 4096 + +void +pan_kmod_queue_bo_map_sync(struct pan_kmod_bo *bo, uint64_t bo_offset, + void *cpu_ptr, uint64_t range, + enum pan_kmod_bo_sync_type type) +{ + struct pan_kmod_dev *dev = bo->dev; + + /* Nothing to do if the buffer is IO coherent or if the BO is not mapped + * cacheable. + */ + if (!(bo->flags & PAN_KMOD_BO_FLAG_WB_MMAP) || + (bo->flags & PAN_KMOD_BO_FLAG_IO_COHERENT)) + return; + + /* If we have userspace cache flushing ops, use them instead of trapping + * through to the kernel. + */ + if (pan_kmod_can_sync_bo_map_from_userland(dev)) { + /* Pre-flush needs to be executed before each flush/inval operation, but + * we can batch the post flush/inval fence. util_pre_flush_fence() being + * a NOP on aarch64, it's effectively free there, but we keep it here for + * clarity (not sure we care about Mali on x86 to be honest :D). + */ + util_pre_flush_fence(); + + if (type == PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH) + util_flush_range_no_fence(cpu_ptr, range); + else + util_flush_inval_range_no_fence(cpu_ptr, range); + + /* The util_pre_flush_inval_fence() is inserted by + * pan_kmod_flush_bo_map_syncs() to avoid unnecessary serialization when + * flush/invalidate operations are batched. + */ + p_atomic_set(&dev->pending_bo_syncs.user_cache_ops_pending, true); + return; + } + + simple_mtx_lock(&dev->pending_bo_syncs.lock); + + /* If we reach the limit, flush the pending ops before queuing new ones. */ + if (util_dynarray_num_elements(&dev->pending_bo_syncs.array, + struct pan_kmod_deferred_bo_sync) >= + MAX_PENDING_SYNC_OPS) + pan_kmod_flush_bo_map_syncs_locked(dev); + + uint64_t start = bo_offset & ~((uint64_t)util_cache_granularity() - 1); + uint64_t end = ALIGN_POT(bo_offset + range, util_cache_granularity()); + + struct pan_kmod_deferred_bo_sync new_sync = { + .bo = bo, + .start = start, + .size = end - start, + .type = type, + }; + + bo->has_pending_deferred_syncs = true; + util_dynarray_append(&dev->pending_bo_syncs.array, new_sync); + + simple_mtx_unlock(&dev->pending_bo_syncs.lock); +} diff --git a/src/panfrost/lib/kmod/pan_kmod.h b/src/panfrost/lib/kmod/pan_kmod.h index 145d4123916..34226a349a3 100644 --- a/src/panfrost/lib/kmod/pan_kmod.h +++ b/src/panfrost/lib/kmod/pan_kmod.h @@ -23,6 +23,7 @@ #include "drm-uapi/drm.h" +#include "util/cache_ops.h" #include "util/log.h" #include "util/macros.h" #include "util/os_file.h" @@ -31,6 +32,7 @@ #include "util/simple_mtx.h" #include "util/sparse_array.h" #include "util/u_atomic.h" +#include "util/u_dynarray.h" #include "util/perf/cpu_trace.h" #include "kmod/panthor_kmod.h" @@ -152,6 +154,9 @@ struct pan_kmod_bo { /* Combination of pan_kmod_bo_flags flags. */ uint32_t flags; + /* True if some deferred syncs targetting this BO are pending. */ + bool has_pending_deferred_syncs; + /* If non-NULL, the buffer object can only by mapped on this VM. Typical * the case for all internal/non-shareable buffers. The backend can * optimize things based on this information. Calling pan_kmod_bo_export() @@ -363,6 +368,9 @@ enum pan_kmod_dev_flags { * owned by the device, iff the device creation succeeded. */ PAN_KMOD_DEV_FLAG_OWNS_FD = (1 << 0), + + /* Force BO syncs through the kernel. */ + PAN_KMOD_DEV_FLAG_MMAP_SYNC_THROUGH_KERNEL = (1 << 1), }; /* Encode a virtual address range. */ @@ -423,6 +431,9 @@ struct pan_kmod_ops { /* Get the file offset to use to mmap() a buffer object. */ off_t (*bo_get_mmap_offset)(struct pan_kmod_bo *bo); + /* Flush the pending BO map syncs. */ + int (*flush_bo_map_syncs)(struct pan_kmod_dev *dev); + /* Wait for a buffer object to be ready for read or read/write accesses. */ bool (*bo_wait)(struct pan_kmod_bo *bo, int64_t timeout_ns, bool for_read_only_access); @@ -485,6 +496,19 @@ pan_kmod_driver_version_at_least(const struct pan_kmod_driver *driver, return driver->version.minor >= minor; } +enum pan_kmod_bo_sync_type { + PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH, + PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE, +}; + +/* Used to queue BO sync operations. */ +struct pan_kmod_deferred_bo_sync { + struct pan_kmod_bo *bo; + uint64_t start; + uint64_t size; + enum pan_kmod_bo_sync_type type; +}; + /* Device object. */ struct pan_kmod_dev { /* FD attached to the device. */ @@ -513,6 +537,13 @@ struct pan_kmod_dev { simple_mtx_t lock; } handle_to_bo; + /* Pending BO syncs. */ + struct { + bool user_cache_ops_pending; + struct util_dynarray array; + simple_mtx_t lock; + } pending_bo_syncs; + /* Allocator attached to the device. */ const struct pan_kmod_allocator *allocator; @@ -668,6 +699,19 @@ pan_kmod_bo_mmap(struct pan_kmod_bo *bo, off_t bo_offset, size_t size, int prot, return host_addr; } +static inline bool +pan_kmod_can_sync_bo_map_from_userland(struct pan_kmod_dev *dev) +{ + return util_has_cache_ops() && + !(dev->flags & PAN_KMOD_DEV_FLAG_MMAP_SYNC_THROUGH_KERNEL); +} + +void pan_kmod_queue_bo_map_sync(struct pan_kmod_bo *bo, uint64_t bo_offset, + void *cpu_ptr, uint64_t range, + enum pan_kmod_bo_sync_type type); + +void pan_kmod_flush_bo_map_syncs(struct pan_kmod_dev *dev); + static inline void pan_kmod_set_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const char *label) { diff --git a/src/panfrost/lib/kmod/pan_kmod_backend.h b/src/panfrost/lib/kmod/pan_kmod_backend.h index 04ec699e3c9..90df111844d 100644 --- a/src/panfrost/lib/kmod/pan_kmod_backend.h +++ b/src/panfrost/lib/kmod/pan_kmod_backend.h @@ -18,6 +18,8 @@ pan_kmod_dev_init(struct pan_kmod_dev *dev, int fd, uint32_t flags, simple_mtx_init(&dev->handle_to_bo.lock, mtx_plain); util_sparse_array_init(&dev->handle_to_bo.array, sizeof(struct pan_kmod_bo *), 512); + simple_mtx_init(&dev->pending_bo_syncs.lock, mtx_plain); + util_dynarray_init(&dev->pending_bo_syncs.array, NULL); dev->driver.version.major = version->version_major; dev->driver.version.minor = version->version_minor; dev->fd = fd; @@ -32,8 +34,10 @@ pan_kmod_dev_cleanup(struct pan_kmod_dev *dev) if (dev->flags & PAN_KMOD_DEV_FLAG_OWNS_FD) close(dev->fd); + util_dynarray_fini(&dev->pending_bo_syncs.array); util_sparse_array_finish(&dev->handle_to_bo.array); simple_mtx_destroy(&dev->handle_to_bo.lock); + simple_mtx_destroy(&dev->pending_bo_syncs.lock); } static inline void * @@ -93,6 +97,20 @@ pan_kmod_bo_init(struct pan_kmod_bo *bo, struct pan_kmod_dev *dev, p_atomic_set(&bo->refcnt, 1); } +void pan_kmod_flush_bo_map_syncs_locked(struct pan_kmod_dev *dev); + +static inline void +pan_kmod_bo_cleanup(struct pan_kmod_bo *bo) +{ + if (bo->has_pending_deferred_syncs) { + struct pan_kmod_dev *dev = bo->dev; + + simple_mtx_lock(&dev->pending_bo_syncs.lock); + pan_kmod_flush_bo_map_syncs_locked(dev); + simple_mtx_unlock(&dev->pending_bo_syncs.lock); + } +} + static inline void pan_kmod_vm_init(struct pan_kmod_vm *vm, struct pan_kmod_dev *dev, uint32_t handle, uint32_t flags, uint64_t pgsize_bitmap) diff --git a/src/panfrost/lib/kmod/panfrost_kmod.c b/src/panfrost/lib/kmod/panfrost_kmod.c index 049fbafab6f..c2484ee5710 100644 --- a/src/panfrost/lib/kmod/panfrost_kmod.c +++ b/src/panfrost/lib/kmod/panfrost_kmod.c @@ -12,6 +12,7 @@ #include "util/hash_table.h" #include "util/macros.h" #include "util/simple_mtx.h" +#include "util/stack_array.h" #include "drm-uapi/panfrost_drm.h" @@ -312,6 +313,7 @@ err_free_bo: static void panfrost_kmod_bo_free(struct pan_kmod_bo *bo) { + pan_kmod_bo_cleanup(bo); drmCloseBufferHandle(bo->dev->fd, bo->handle); pan_kmod_dev_free(bo->dev, bo); } @@ -338,6 +340,24 @@ panfrost_kmod_bo_import(struct pan_kmod_dev *dev, uint32_t handle, uint64_t size panfrost_bo->offset = get_bo_offset.offset; + if (pan_kmod_driver_version_at_least(&dev->driver, 1, 6)) { + struct drm_panfrost_query_bo_info args = { + .handle = handle, + }; + + ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_QUERY_BO_INFO, &args); + if (ret) { + mesa_loge("PANFROST_BO_QUERY_INFO failed (err=%d)", errno); + goto err_free_bo; + } + + /* If the BO comes from a different subsystem, we don't allow + * mmap() to avoid the CPU-sync churn. + */ + if (args.extra_flags & DRM_PANFROST_BO_IS_IMPORTED) + flags |= PAN_KMOD_BO_FLAG_NO_MMAP; + } + pan_kmod_bo_init(&panfrost_bo->base, dev, NULL, size, flags | PAN_KMOD_BO_FLAG_IMPORTED, handle); return &panfrost_bo->base; @@ -380,6 +400,39 @@ panfrost_kmod_bo_wait(struct pan_kmod_bo *bo, int64_t timeout_ns, return false; } +static int +panfrost_kmod_flush_bo_map_syncs(struct pan_kmod_dev *dev) +{ + STACK_ARRAY(struct drm_panfrost_bo_sync_op, panfrost_ops, + util_dynarray_num_elements(&dev->pending_bo_syncs.array, + struct pan_kmod_deferred_bo_sync)); + + uint32_t panfrost_count = 0; + util_dynarray_foreach(&dev->pending_bo_syncs.array, + struct pan_kmod_deferred_bo_sync, sync) { + panfrost_ops[panfrost_count++] = (struct drm_panfrost_bo_sync_op){ + .handle = sync->bo->handle, + .type = sync->type == PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH + ? PANFROST_BO_SYNC_CPU_CACHE_FLUSH + : PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE, + .offset = sync->start, + .size = sync->size, + }; + } + + struct drm_panfrost_sync_bo req = { + .ops = (uintptr_t)panfrost_ops, + .op_count = panfrost_count, + }; + int ret = pan_kmod_ioctl(dev->fd, DRM_IOCTL_PANFROST_SYNC_BO, &req); + if (ret) + mesa_loge("DRM_IOCTL_PANFROST_BO_SYNC failed (err=%d)", errno); + + STACK_ARRAY_FINISH(panfrost_ops); + + return ret; +} + static void panfrost_kmod_bo_make_evictable(struct pan_kmod_bo *bo) { @@ -559,6 +612,7 @@ const struct pan_kmod_ops panfrost_kmod_ops = { .bo_import = panfrost_kmod_bo_import, .bo_get_mmap_offset = panfrost_kmod_bo_get_mmap_offset, .bo_wait = panfrost_kmod_bo_wait, + .flush_bo_map_syncs = panfrost_kmod_flush_bo_map_syncs, .bo_make_evictable = panfrost_kmod_bo_make_evictable, .bo_make_unevictable = panfrost_kmod_bo_make_unevictable, .vm_create = panfrost_kmod_vm_create, diff --git a/src/panfrost/lib/kmod/panthor_kmod.c b/src/panfrost/lib/kmod/panthor_kmod.c index d11d1be4339..dffb02ef06d 100644 --- a/src/panfrost/lib/kmod/panthor_kmod.c +++ b/src/panfrost/lib/kmod/panthor_kmod.c @@ -13,6 +13,7 @@ #include "util/libsync.h" #include "util/macros.h" #include "util/os_time.h" +#include "util/stack_array.h" #include "util/simple_mtx.h" #include "util/u_debug.h" #include "util/vma.h" @@ -413,6 +414,8 @@ panthor_kmod_bo_free(struct pan_kmod_bo *bo) struct panthor_kmod_bo *panthor_bo = container_of(bo, struct panthor_kmod_bo, base); + pan_kmod_bo_cleanup(bo); + if (!bo->exclusive_vm) drmSyncobjDestroy(bo->dev->fd, panthor_bo->sync.handle); @@ -424,6 +427,7 @@ static struct pan_kmod_bo * panthor_kmod_bo_import(struct pan_kmod_dev *dev, uint32_t handle, uint64_t size, uint32_t flags) { + int ret; struct panthor_kmod_bo *panthor_bo = pan_kmod_dev_alloc(dev, sizeof(*panthor_bo)); if (!panthor_bo) { @@ -431,10 +435,28 @@ panthor_kmod_bo_import(struct pan_kmod_dev *dev, uint32_t handle, uint64_t size, return NULL; } + if (pan_kmod_driver_version_at_least(&dev->driver, 1, 7)) { + struct drm_panthor_bo_query_info args = { + .handle = handle, + }; + + ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_BO_QUERY_INFO, &args); + if (ret) { + mesa_loge("PANTHOR_BO_QUERY_INFO failed (err=%d)", errno); + goto err_free_bo; + } + + /* If the BO comes from a different subsystem, we don't allow + * mmap() to avoid the CPU-sync churn. + */ + if (args.extra_flags & DRM_PANTHOR_BO_IS_IMPORTED) + flags |= PAN_KMOD_BO_FLAG_NO_MMAP; + } + /* Create a unsignalled syncobj on import. Will serve as a * temporary container for the exported dmabuf sync file. */ - int ret = drmSyncobjCreate(dev->fd, 0, &panthor_bo->sync.handle); + ret = drmSyncobjCreate(dev->fd, 0, &panthor_bo->sync.handle); if (ret) { mesa_loge("drmSyncobjCreate() failed (err=%d)", errno); goto err_free_bo; @@ -582,6 +604,38 @@ panthor_kmod_bo_wait(struct pan_kmod_bo *bo, int64_t timeout_ns, } } +static int +panthor_kmod_flush_bo_map_syncs(struct pan_kmod_dev *dev) +{ + STACK_ARRAY(struct drm_panthor_bo_sync_op, panthor_ops, + util_dynarray_num_elements(&dev->pending_bo_syncs.array, + struct pan_kmod_deferred_bo_sync)); + + uint32_t panthor_count = 0; + util_dynarray_foreach(&dev->pending_bo_syncs.array, + struct pan_kmod_deferred_bo_sync, sync) { + panthor_ops[panthor_count++] = (struct drm_panthor_bo_sync_op){ + .handle = sync->bo->handle, + .type = sync->type == PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH + ? DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH + : DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE, + .offset = sync->start, + .size = sync->size, + }; + } + + struct drm_panthor_bo_sync req = { + .ops = DRM_PANTHOR_OBJ_ARRAY(panthor_count, panthor_ops), + }; + int ret = pan_kmod_ioctl(dev->fd, DRM_IOCTL_PANTHOR_BO_SYNC, &req); + if (ret) + mesa_loge("DRM_IOCTL_PANTHOR_BO_SYNC failed (err=%d)", errno); + + STACK_ARRAY_FINISH(panthor_ops); + + return ret; +} + /* Attach a sync to a buffer object. */ int panthor_kmod_bo_attach_sync_point(struct pan_kmod_bo *bo, uint32_t sync_handle, @@ -1255,6 +1309,7 @@ const struct pan_kmod_ops panthor_kmod_ops = { .bo_export = panthor_kmod_bo_export, .bo_get_mmap_offset = panthor_kmod_bo_get_mmap_offset, .bo_wait = panthor_kmod_bo_wait, + .flush_bo_map_syncs = panthor_kmod_flush_bo_map_syncs, .vm_create = panthor_kmod_vm_create, .vm_destroy = panthor_kmod_vm_destroy, .vm_bind = panthor_kmod_vm_bind,