pan/kmod: Add new helpers to sync BO CPU mappings

pan_kmod_flush_bo_map_syncs() queues CPU-sync operations, and
pan_kmod_flush_bo_map_syncs_locked() ensures all queued
operations are flushed/executed. Those will be used when we start
adding support for CPU-cached mappings.

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36385>
This commit is contained in:
Faith Ekstrand 2025-07-25 15:35:45 +00:00 committed by Boris Brezillon
parent af14c37bf1
commit b5e47ba894
5 changed files with 276 additions and 1 deletions

View file

@ -7,9 +7,11 @@
#include <string.h>
#include <xf86drm.h>
#include "util/cache_ops.h"
#include "util/u_memory.h"
#include "util/macros.h"
#include "pan_kmod.h"
#include "pan_kmod_backend.h"
extern const struct pan_kmod_ops panfrost_kmod_ops;
extern const struct pan_kmod_ops panthor_kmod_ops;
@ -204,3 +206,105 @@ err_unlock:
return NULL;
}
void
pan_kmod_flush_bo_map_syncs_locked(struct pan_kmod_dev *dev)
{
ASSERTED int ret = dev->ops->flush_bo_map_syncs(dev);
assert(!ret);
util_dynarray_foreach(&dev->pending_bo_syncs.array,
struct pan_kmod_deferred_bo_sync, sync)
sync->bo->has_pending_deferred_syncs = false;
util_dynarray_clear(&dev->pending_bo_syncs.array);
}
void
pan_kmod_flush_bo_map_syncs(struct pan_kmod_dev *dev)
{
if (dev->props.is_io_coherent)
return;
/* Barrier to make sure all flush/invalidate requests are effective. */
if (p_atomic_xchg(&dev->pending_bo_syncs.user_cache_ops_pending, false))
util_post_flush_inval_fence();
/* This can be racy, but that's fine, because we expect a future call to
* pan_kmod_flush_bo_map_syncs() if new ops are being added while we check
* this value.
*/
if (!util_dynarray_num_elements(&dev->pending_bo_syncs.array,
struct pan_kmod_deferred_bo_sync))
return;
simple_mtx_lock(&dev->pending_bo_syncs.lock);
pan_kmod_flush_bo_map_syncs_locked(dev);
simple_mtx_unlock(&dev->pending_bo_syncs.lock);
}
/* Arbitrary limit for now. Pick something bigger or make it configurable if it
* becomes problematic.
*/
#define MAX_PENDING_SYNC_OPS 4096
void
pan_kmod_queue_bo_map_sync(struct pan_kmod_bo *bo, uint64_t bo_offset,
void *cpu_ptr, uint64_t range,
enum pan_kmod_bo_sync_type type)
{
struct pan_kmod_dev *dev = bo->dev;
/* Nothing to do if the buffer is IO coherent or if the BO is not mapped
* cacheable.
*/
if (!(bo->flags & PAN_KMOD_BO_FLAG_WB_MMAP) ||
(bo->flags & PAN_KMOD_BO_FLAG_IO_COHERENT))
return;
/* If we have userspace cache flushing ops, use them instead of trapping
* through to the kernel.
*/
if (pan_kmod_can_sync_bo_map_from_userland(dev)) {
/* Pre-flush needs to be executed before each flush/inval operation, but
* we can batch the post flush/inval fence. util_pre_flush_fence() being
* a NOP on aarch64, it's effectively free there, but we keep it here for
* clarity (not sure we care about Mali on x86 to be honest :D).
*/
util_pre_flush_fence();
if (type == PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH)
util_flush_range_no_fence(cpu_ptr, range);
else
util_flush_inval_range_no_fence(cpu_ptr, range);
/* The util_pre_flush_inval_fence() is inserted by
* pan_kmod_flush_bo_map_syncs() to avoid unnecessary serialization when
* flush/invalidate operations are batched.
*/
p_atomic_set(&dev->pending_bo_syncs.user_cache_ops_pending, true);
return;
}
simple_mtx_lock(&dev->pending_bo_syncs.lock);
/* If we reach the limit, flush the pending ops before queuing new ones. */
if (util_dynarray_num_elements(&dev->pending_bo_syncs.array,
struct pan_kmod_deferred_bo_sync) >=
MAX_PENDING_SYNC_OPS)
pan_kmod_flush_bo_map_syncs_locked(dev);
uint64_t start = bo_offset & ~((uint64_t)util_cache_granularity() - 1);
uint64_t end = ALIGN_POT(bo_offset + range, util_cache_granularity());
struct pan_kmod_deferred_bo_sync new_sync = {
.bo = bo,
.start = start,
.size = end - start,
.type = type,
};
bo->has_pending_deferred_syncs = true;
util_dynarray_append(&dev->pending_bo_syncs.array, new_sync);
simple_mtx_unlock(&dev->pending_bo_syncs.lock);
}

View file

@ -23,6 +23,7 @@
#include "drm-uapi/drm.h"
#include "util/cache_ops.h"
#include "util/log.h"
#include "util/macros.h"
#include "util/os_file.h"
@ -31,6 +32,7 @@
#include "util/simple_mtx.h"
#include "util/sparse_array.h"
#include "util/u_atomic.h"
#include "util/u_dynarray.h"
#include "util/perf/cpu_trace.h"
#include "kmod/panthor_kmod.h"
@ -152,6 +154,9 @@ struct pan_kmod_bo {
/* Combination of pan_kmod_bo_flags flags. */
uint32_t flags;
/* True if some deferred syncs targetting this BO are pending. */
bool has_pending_deferred_syncs;
/* If non-NULL, the buffer object can only by mapped on this VM. Typical
* the case for all internal/non-shareable buffers. The backend can
* optimize things based on this information. Calling pan_kmod_bo_export()
@ -363,6 +368,9 @@ enum pan_kmod_dev_flags {
* owned by the device, iff the device creation succeeded.
*/
PAN_KMOD_DEV_FLAG_OWNS_FD = (1 << 0),
/* Force BO syncs through the kernel. */
PAN_KMOD_DEV_FLAG_MMAP_SYNC_THROUGH_KERNEL = (1 << 1),
};
/* Encode a virtual address range. */
@ -423,6 +431,9 @@ struct pan_kmod_ops {
/* Get the file offset to use to mmap() a buffer object. */
off_t (*bo_get_mmap_offset)(struct pan_kmod_bo *bo);
/* Flush the pending BO map syncs. */
int (*flush_bo_map_syncs)(struct pan_kmod_dev *dev);
/* Wait for a buffer object to be ready for read or read/write accesses. */
bool (*bo_wait)(struct pan_kmod_bo *bo, int64_t timeout_ns,
bool for_read_only_access);
@ -485,6 +496,19 @@ pan_kmod_driver_version_at_least(const struct pan_kmod_driver *driver,
return driver->version.minor >= minor;
}
enum pan_kmod_bo_sync_type {
PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH,
PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE,
};
/* Used to queue BO sync operations. */
struct pan_kmod_deferred_bo_sync {
struct pan_kmod_bo *bo;
uint64_t start;
uint64_t size;
enum pan_kmod_bo_sync_type type;
};
/* Device object. */
struct pan_kmod_dev {
/* FD attached to the device. */
@ -513,6 +537,13 @@ struct pan_kmod_dev {
simple_mtx_t lock;
} handle_to_bo;
/* Pending BO syncs. */
struct {
bool user_cache_ops_pending;
struct util_dynarray array;
simple_mtx_t lock;
} pending_bo_syncs;
/* Allocator attached to the device. */
const struct pan_kmod_allocator *allocator;
@ -668,6 +699,19 @@ pan_kmod_bo_mmap(struct pan_kmod_bo *bo, off_t bo_offset, size_t size, int prot,
return host_addr;
}
static inline bool
pan_kmod_can_sync_bo_map_from_userland(struct pan_kmod_dev *dev)
{
return util_has_cache_ops() &&
!(dev->flags & PAN_KMOD_DEV_FLAG_MMAP_SYNC_THROUGH_KERNEL);
}
void pan_kmod_queue_bo_map_sync(struct pan_kmod_bo *bo, uint64_t bo_offset,
void *cpu_ptr, uint64_t range,
enum pan_kmod_bo_sync_type type);
void pan_kmod_flush_bo_map_syncs(struct pan_kmod_dev *dev);
static inline void
pan_kmod_set_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const char *label)
{

View file

@ -18,6 +18,8 @@ pan_kmod_dev_init(struct pan_kmod_dev *dev, int fd, uint32_t flags,
simple_mtx_init(&dev->handle_to_bo.lock, mtx_plain);
util_sparse_array_init(&dev->handle_to_bo.array,
sizeof(struct pan_kmod_bo *), 512);
simple_mtx_init(&dev->pending_bo_syncs.lock, mtx_plain);
util_dynarray_init(&dev->pending_bo_syncs.array, NULL);
dev->driver.version.major = version->version_major;
dev->driver.version.minor = version->version_minor;
dev->fd = fd;
@ -32,8 +34,10 @@ pan_kmod_dev_cleanup(struct pan_kmod_dev *dev)
if (dev->flags & PAN_KMOD_DEV_FLAG_OWNS_FD)
close(dev->fd);
util_dynarray_fini(&dev->pending_bo_syncs.array);
util_sparse_array_finish(&dev->handle_to_bo.array);
simple_mtx_destroy(&dev->handle_to_bo.lock);
simple_mtx_destroy(&dev->pending_bo_syncs.lock);
}
static inline void *
@ -93,6 +97,20 @@ pan_kmod_bo_init(struct pan_kmod_bo *bo, struct pan_kmod_dev *dev,
p_atomic_set(&bo->refcnt, 1);
}
void pan_kmod_flush_bo_map_syncs_locked(struct pan_kmod_dev *dev);
static inline void
pan_kmod_bo_cleanup(struct pan_kmod_bo *bo)
{
if (bo->has_pending_deferred_syncs) {
struct pan_kmod_dev *dev = bo->dev;
simple_mtx_lock(&dev->pending_bo_syncs.lock);
pan_kmod_flush_bo_map_syncs_locked(dev);
simple_mtx_unlock(&dev->pending_bo_syncs.lock);
}
}
static inline void
pan_kmod_vm_init(struct pan_kmod_vm *vm, struct pan_kmod_dev *dev,
uint32_t handle, uint32_t flags, uint64_t pgsize_bitmap)

View file

@ -12,6 +12,7 @@
#include "util/hash_table.h"
#include "util/macros.h"
#include "util/simple_mtx.h"
#include "util/stack_array.h"
#include "drm-uapi/panfrost_drm.h"
@ -312,6 +313,7 @@ err_free_bo:
static void
panfrost_kmod_bo_free(struct pan_kmod_bo *bo)
{
pan_kmod_bo_cleanup(bo);
drmCloseBufferHandle(bo->dev->fd, bo->handle);
pan_kmod_dev_free(bo->dev, bo);
}
@ -338,6 +340,24 @@ panfrost_kmod_bo_import(struct pan_kmod_dev *dev, uint32_t handle, uint64_t size
panfrost_bo->offset = get_bo_offset.offset;
if (pan_kmod_driver_version_at_least(&dev->driver, 1, 6)) {
struct drm_panfrost_query_bo_info args = {
.handle = handle,
};
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_QUERY_BO_INFO, &args);
if (ret) {
mesa_loge("PANFROST_BO_QUERY_INFO failed (err=%d)", errno);
goto err_free_bo;
}
/* If the BO comes from a different subsystem, we don't allow
* mmap() to avoid the CPU-sync churn.
*/
if (args.extra_flags & DRM_PANFROST_BO_IS_IMPORTED)
flags |= PAN_KMOD_BO_FLAG_NO_MMAP;
}
pan_kmod_bo_init(&panfrost_bo->base, dev, NULL, size,
flags | PAN_KMOD_BO_FLAG_IMPORTED, handle);
return &panfrost_bo->base;
@ -380,6 +400,39 @@ panfrost_kmod_bo_wait(struct pan_kmod_bo *bo, int64_t timeout_ns,
return false;
}
static int
panfrost_kmod_flush_bo_map_syncs(struct pan_kmod_dev *dev)
{
STACK_ARRAY(struct drm_panfrost_bo_sync_op, panfrost_ops,
util_dynarray_num_elements(&dev->pending_bo_syncs.array,
struct pan_kmod_deferred_bo_sync));
uint32_t panfrost_count = 0;
util_dynarray_foreach(&dev->pending_bo_syncs.array,
struct pan_kmod_deferred_bo_sync, sync) {
panfrost_ops[panfrost_count++] = (struct drm_panfrost_bo_sync_op){
.handle = sync->bo->handle,
.type = sync->type == PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH
? PANFROST_BO_SYNC_CPU_CACHE_FLUSH
: PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE,
.offset = sync->start,
.size = sync->size,
};
}
struct drm_panfrost_sync_bo req = {
.ops = (uintptr_t)panfrost_ops,
.op_count = panfrost_count,
};
int ret = pan_kmod_ioctl(dev->fd, DRM_IOCTL_PANFROST_SYNC_BO, &req);
if (ret)
mesa_loge("DRM_IOCTL_PANFROST_BO_SYNC failed (err=%d)", errno);
STACK_ARRAY_FINISH(panfrost_ops);
return ret;
}
static void
panfrost_kmod_bo_make_evictable(struct pan_kmod_bo *bo)
{
@ -559,6 +612,7 @@ const struct pan_kmod_ops panfrost_kmod_ops = {
.bo_import = panfrost_kmod_bo_import,
.bo_get_mmap_offset = panfrost_kmod_bo_get_mmap_offset,
.bo_wait = panfrost_kmod_bo_wait,
.flush_bo_map_syncs = panfrost_kmod_flush_bo_map_syncs,
.bo_make_evictable = panfrost_kmod_bo_make_evictable,
.bo_make_unevictable = panfrost_kmod_bo_make_unevictable,
.vm_create = panfrost_kmod_vm_create,

View file

@ -13,6 +13,7 @@
#include "util/libsync.h"
#include "util/macros.h"
#include "util/os_time.h"
#include "util/stack_array.h"
#include "util/simple_mtx.h"
#include "util/u_debug.h"
#include "util/vma.h"
@ -413,6 +414,8 @@ panthor_kmod_bo_free(struct pan_kmod_bo *bo)
struct panthor_kmod_bo *panthor_bo =
container_of(bo, struct panthor_kmod_bo, base);
pan_kmod_bo_cleanup(bo);
if (!bo->exclusive_vm)
drmSyncobjDestroy(bo->dev->fd, panthor_bo->sync.handle);
@ -424,6 +427,7 @@ static struct pan_kmod_bo *
panthor_kmod_bo_import(struct pan_kmod_dev *dev, uint32_t handle, uint64_t size,
uint32_t flags)
{
int ret;
struct panthor_kmod_bo *panthor_bo =
pan_kmod_dev_alloc(dev, sizeof(*panthor_bo));
if (!panthor_bo) {
@ -431,10 +435,28 @@ panthor_kmod_bo_import(struct pan_kmod_dev *dev, uint32_t handle, uint64_t size,
return NULL;
}
if (pan_kmod_driver_version_at_least(&dev->driver, 1, 7)) {
struct drm_panthor_bo_query_info args = {
.handle = handle,
};
ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_BO_QUERY_INFO, &args);
if (ret) {
mesa_loge("PANTHOR_BO_QUERY_INFO failed (err=%d)", errno);
goto err_free_bo;
}
/* If the BO comes from a different subsystem, we don't allow
* mmap() to avoid the CPU-sync churn.
*/
if (args.extra_flags & DRM_PANTHOR_BO_IS_IMPORTED)
flags |= PAN_KMOD_BO_FLAG_NO_MMAP;
}
/* Create a unsignalled syncobj on import. Will serve as a
* temporary container for the exported dmabuf sync file.
*/
int ret = drmSyncobjCreate(dev->fd, 0, &panthor_bo->sync.handle);
ret = drmSyncobjCreate(dev->fd, 0, &panthor_bo->sync.handle);
if (ret) {
mesa_loge("drmSyncobjCreate() failed (err=%d)", errno);
goto err_free_bo;
@ -582,6 +604,38 @@ panthor_kmod_bo_wait(struct pan_kmod_bo *bo, int64_t timeout_ns,
}
}
static int
panthor_kmod_flush_bo_map_syncs(struct pan_kmod_dev *dev)
{
STACK_ARRAY(struct drm_panthor_bo_sync_op, panthor_ops,
util_dynarray_num_elements(&dev->pending_bo_syncs.array,
struct pan_kmod_deferred_bo_sync));
uint32_t panthor_count = 0;
util_dynarray_foreach(&dev->pending_bo_syncs.array,
struct pan_kmod_deferred_bo_sync, sync) {
panthor_ops[panthor_count++] = (struct drm_panthor_bo_sync_op){
.handle = sync->bo->handle,
.type = sync->type == PAN_KMOD_BO_SYNC_CPU_CACHE_FLUSH
? DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH
: DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE,
.offset = sync->start,
.size = sync->size,
};
}
struct drm_panthor_bo_sync req = {
.ops = DRM_PANTHOR_OBJ_ARRAY(panthor_count, panthor_ops),
};
int ret = pan_kmod_ioctl(dev->fd, DRM_IOCTL_PANTHOR_BO_SYNC, &req);
if (ret)
mesa_loge("DRM_IOCTL_PANTHOR_BO_SYNC failed (err=%d)", errno);
STACK_ARRAY_FINISH(panthor_ops);
return ret;
}
/* Attach a sync to a buffer object. */
int
panthor_kmod_bo_attach_sync_point(struct pan_kmod_bo *bo, uint32_t sync_handle,
@ -1255,6 +1309,7 @@ const struct pan_kmod_ops panthor_kmod_ops = {
.bo_export = panthor_kmod_bo_export,
.bo_get_mmap_offset = panthor_kmod_bo_get_mmap_offset,
.bo_wait = panthor_kmod_bo_wait,
.flush_bo_map_syncs = panthor_kmod_flush_bo_map_syncs,
.vm_create = panthor_kmod_vm_create,
.vm_destroy = panthor_kmod_vm_destroy,
.vm_bind = panthor_kmod_vm_bind,