diff --git a/src/freedreno/drm/freedreno_bo.c b/src/freedreno/drm/freedreno_bo.c index f1c72543be6..84ede2e7f38 100644 --- a/src/freedreno/drm/freedreno_bo.c +++ b/src/freedreno/drm/freedreno_bo.c @@ -294,6 +294,35 @@ fd_bo_del(struct fd_bo *bo) simple_mtx_unlock(&table_lock); } +/** + * Cleanup fences, dropping pipe references. If 'expired' is true, only + * cleanup expired fences. + * + * Normally we expect at most a single fence, the exception being bo's + * shared between contexts + */ +static void +cleanup_fences(struct fd_bo *bo, bool expired) +{ + simple_mtx_assert_locked(&table_lock); + + for (int i = 0; i < bo->nr_fences; i++) { + struct fd_bo_fence *f = &bo->fences[i]; + + if (expired && fd_fence_before(f->pipe->control->fence, f->fence)) + continue; + + fd_pipe_del_locked(f->pipe); + bo->nr_fences--; + + if (bo->nr_fences > 0) { + /* Shuffle up the last entry to replace the current slot: */ + bo->fences[i] = bo->fences[bo->nr_fences]; + i--; + } + } +} + /* Called under table_lock */ void bo_del(struct fd_bo *bo) @@ -302,6 +331,9 @@ bo_del(struct fd_bo *bo) simple_mtx_assert_locked(&table_lock); + cleanup_fences(bo, false); + free(bo->fences); + if (bo->map) os_munmap(bo->map, bo->size); @@ -340,6 +372,7 @@ fd_bo_get_name(struct fd_bo *bo, uint32_t *name) set_name(bo, req.name); simple_mtx_unlock(&table_lock); bo->bo_reuse = NO_CACHE; + bo->shared = true; } *name = bo->name; @@ -351,6 +384,7 @@ uint32_t fd_bo_handle(struct fd_bo *bo) { bo->bo_reuse = NO_CACHE; + bo->shared = true; return bo->handle; } @@ -366,6 +400,7 @@ fd_bo_dmabuf(struct fd_bo *bo) } bo->bo_reuse = NO_CACHE; + bo->shared = true; return prime_fd; } @@ -402,11 +437,73 @@ fd_bo_map(struct fd_bo *bo) int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op) { + if (op & FD_BO_PREP_NOSYNC) { + simple_mtx_lock(&table_lock); + enum fd_bo_state state = fd_bo_state(bo); + simple_mtx_unlock(&table_lock); + + switch (state) { + case FD_BO_STATE_IDLE: + return 0; + case FD_BO_STATE_BUSY: + return -EBUSY; + case FD_BO_STATE_UNKNOWN: + break; + } + } return bo->funcs->cpu_prep(bo, pipe, op); } void fd_bo_cpu_fini(struct fd_bo *bo) { - bo->funcs->cpu_fini(bo); +// TODO until we have cached buffers, the kernel side ioctl does nothing, +// so just skip it. When we have cached buffers, we can make the +// ioctl conditional +// bo->funcs->cpu_fini(bo); } + +void +fd_bo_add_fence(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t fence) +{ + simple_mtx_assert_locked(&table_lock); + + if (bo->nosync) + return; + + /* The common case is bo re-used on the same pipe it had previously + * been used on: + */ + for (int i = 0; i < bo->nr_fences; i++) { + struct fd_bo_fence *f = &bo->fences[i]; + if (f->pipe == pipe) { + assert(fd_fence_before(f->fence, fence)); + f->fence = fence; + return; + } + } + + cleanup_fences(bo, true); + + APPEND(bo, fences, (struct fd_bo_fence){ + .pipe = fd_pipe_ref_locked(pipe), + .fence = fence, + }); +} + +enum fd_bo_state +fd_bo_state(struct fd_bo *bo) +{ + simple_mtx_assert_locked(&table_lock); + + cleanup_fences(bo, true); + + if (bo->shared || bo->nosync) + return FD_BO_STATE_UNKNOWN; + + if (!bo->nr_fences) + return FD_BO_STATE_IDLE; + + return FD_BO_STATE_BUSY; +} + diff --git a/src/freedreno/drm/freedreno_bo_cache.c b/src/freedreno/drm/freedreno_bo_cache.c index e46d04acd21..6a028d9ae40 100644 --- a/src/freedreno/drm/freedreno_bo_cache.c +++ b/src/freedreno/drm/freedreno_bo_cache.c @@ -122,14 +122,6 @@ get_bucket(struct fd_bo_cache *cache, uint32_t size) return NULL; } -static int -is_idle(struct fd_bo *bo) -{ - return fd_bo_cpu_prep(bo, NULL, - FD_BO_PREP_READ | FD_BO_PREP_WRITE | - FD_BO_PREP_NOSYNC) == 0; -} - static struct fd_bo * find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags) { @@ -146,7 +138,7 @@ find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags) if (!list_is_empty(&bucket->list)) { bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list); /* TODO check for compatible flags? */ - if (is_idle(bo)) { + if (fd_bo_state(bo) == FD_BO_STATE_IDLE) { list_del(&bo->list); } else { bo = NULL; diff --git a/src/freedreno/drm/freedreno_drmif.h b/src/freedreno/drm/freedreno_drmif.h index 9f5058c2a4c..3dd3609ee15 100644 --- a/src/freedreno/drm/freedreno_drmif.h +++ b/src/freedreno/drm/freedreno_drmif.h @@ -63,6 +63,22 @@ enum fd_param_id { FD_GLOBAL_FAULTS, /* # of global (all context) faults */ }; +/** + * Helper for fence/seqno comparisions which deals properly with rollover. + * Returns true if fence 'a' is before fence 'b' + */ +static inline bool +fd_fence_before(uint32_t a, uint32_t b) +{ + return (int32_t)(a - b) < 0; +} + +static inline bool +fd_fence_after(uint32_t a, uint32_t b) +{ + return (int32_t)(a - b) > 0; +} + /* bo flags: */ #define FD_BO_GPUREADONLY BITSET_BIT(1) #define FD_BO_SCANOUT BITSET_BIT(2) @@ -106,6 +122,7 @@ struct fd_pipe *fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id); struct fd_pipe *fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio); struct fd_pipe *fd_pipe_ref(struct fd_pipe *pipe); +struct fd_pipe *fd_pipe_ref_locked(struct fd_pipe *pipe); void fd_pipe_del(struct fd_pipe *pipe); int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param, uint64_t *value); diff --git a/src/freedreno/drm/freedreno_pipe.c b/src/freedreno/drm/freedreno_pipe.c index b7e633f8f5e..9078c29fe11 100644 --- a/src/freedreno/drm/freedreno_pipe.c +++ b/src/freedreno/drm/freedreno_pipe.c @@ -60,6 +60,19 @@ fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio) fd_pipe_get_param(pipe, FD_GPU_ID, &val); pipe->gpu_id = val; + pipe->control_mem = fd_bo_new(dev, sizeof(*pipe->control), + 0, "pipe-control"); + pipe->control = fd_bo_map(pipe->control_mem); + + /* We don't want the control_mem bo to hold a reference to the ourself, + * so disable userspace fencing. This also means that we won't be able + * to determine if the buffer is idle which is needed by bo-cache. But + * pipe creation/destroy is not a high frequency event so just disable + * the bo-cache as well: + */ + pipe->control_mem->nosync = true; + pipe->control_mem->bo_reuse = NO_CACHE; + return pipe; } @@ -72,16 +85,26 @@ fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id) struct fd_pipe * fd_pipe_ref(struct fd_pipe *pipe) { - p_atomic_inc(&pipe->refcnt); + simple_mtx_lock(&table_lock); + fd_pipe_ref_locked(pipe); + simple_mtx_unlock(&table_lock); + return pipe; +} + +struct fd_pipe * +fd_pipe_ref_locked(struct fd_pipe *pipe) +{ + simple_mtx_assert_locked(&table_lock); + pipe->refcnt++; return pipe; } void fd_pipe_del(struct fd_pipe *pipe) { - if (!p_atomic_dec_zero(&pipe->refcnt)) - return; - pipe->funcs->destroy(pipe); + simple_mtx_lock(&table_lock); + fd_pipe_del_locked(pipe); + simple_mtx_unlock(&table_lock); } void @@ -90,6 +113,7 @@ fd_pipe_del_locked(struct fd_pipe *pipe) simple_mtx_assert_locked(&table_lock); if (!p_atomic_dec_zero(&pipe->refcnt)) return; + fd_bo_del_locked(pipe->control_mem); pipe->funcs->destroy(pipe); } @@ -108,5 +132,26 @@ fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp) int fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp, uint64_t timeout) { + return pipe->funcs->wait(pipe, timestamp, timeout); } + +uint32_t +fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring) +{ + uint32_t fence = ++pipe->last_fence; + + if (pipe->gpu_id >= 500) { + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS)); + OUT_RELOC(ring, control_ptr(pipe, fence)); /* ADDR_LO/HI */ + OUT_RING(ring, fence); + } else { + OUT_PKT3(ring, CP_EVENT_WRITE, 3); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS)); + OUT_RELOC(ring, control_ptr(pipe, fence)); /* ADDR */ + OUT_RING(ring, fence); + } + + return fence; +} diff --git a/src/freedreno/drm/freedreno_priv.h b/src/freedreno/drm/freedreno_priv.h index d20cc7822db..21dcb7d9110 100644 --- a/src/freedreno/drm/freedreno_priv.h +++ b/src/freedreno/drm/freedreno_priv.h @@ -151,14 +151,39 @@ struct fd_pipe_funcs { void (*destroy)(struct fd_pipe *pipe); }; +struct fd_pipe_control { + uint32_t fence; +}; +#define control_ptr(pipe, member) \ + (pipe)->control_mem, offsetof(struct fd_pipe_control, member), 0, 0 + struct fd_pipe { struct fd_device *dev; enum fd_pipe_id id; uint32_t gpu_id; + + /** + * Note refcnt is *not* atomic, but protected by table_lock, since the + * table_lock is held in fd_bo_add_fence(), which is the hotpath. + */ int32_t refcnt; + + /** + * Previous fence seqno allocated for this pipe. The fd_pipe represents + * a single timeline, fences allocated by this pipe can be compared to + * each other, but fences from different pipes are not comparable (as + * there could be preemption of multiple priority level submitqueues at + * play) + */ + uint32_t last_fence; + struct fd_bo *control_mem; + volatile struct fd_pipe_control *control; + const struct fd_pipe_funcs *funcs; }; +uint32_t fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring); + struct fd_submit_funcs { struct fd_ringbuffer *(*new_ringbuffer)(struct fd_submit *submit, uint32_t size, @@ -173,6 +198,7 @@ struct fd_submit { const struct fd_submit_funcs *funcs; struct fd_ringbuffer *primary; + uint32_t fence; }; struct fd_bo_funcs { @@ -185,6 +211,15 @@ struct fd_bo_funcs { void (*destroy)(struct fd_bo *bo); }; +struct fd_bo_fence { + /* For non-shared buffers, track the last pipe the buffer was active + * on, and the per-pipe fence value that indicates when the buffer is + * idle: + */ + uint32_t fence; + struct fd_pipe *pipe; +}; + struct fd_bo { struct fd_device *dev; uint32_t size; @@ -200,12 +235,35 @@ struct fd_bo { NO_CACHE = 0, BO_CACHE = 1, RING_CACHE = 2, - } bo_reuse; + } bo_reuse : 2; + + /* Buffers that are shared (imported or exported) may be used in + * other processes, so we need to fallback to kernel to determine + * busyness. + */ + bool shared : 1; + + /* We need to be able to disable userspace fence synchronization for + * special internal buffers, namely the pipe->control buffer, to avoid + * a circular reference loop. + */ + bool nosync : 1; struct list_head list; /* bucket-list entry */ time_t free_time; /* time when added to bucket-list */ + + DECLARE_ARRAY(struct fd_bo_fence, fences); }; +void fd_bo_add_fence(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t fence); + +enum fd_bo_state { + FD_BO_STATE_IDLE, + FD_BO_STATE_BUSY, + FD_BO_STATE_UNKNOWN, +}; +enum fd_bo_state fd_bo_state(struct fd_bo *bo); + struct fd_bo *fd_bo_new_ring(struct fd_device *dev, uint32_t size); #define enable_debug 0 /* TODO make dynamic */ diff --git a/src/freedreno/drm/freedreno_ringbuffer.c b/src/freedreno/drm/freedreno_ringbuffer.c index 291bd74e61c..2e3000b5fd7 100644 --- a/src/freedreno/drm/freedreno_ringbuffer.c +++ b/src/freedreno/drm/freedreno_ringbuffer.c @@ -48,7 +48,7 @@ int fd_submit_flush(struct fd_submit *submit, int in_fence_fd, int *out_fence_fd, uint32_t *out_fence) { - debug_assert(submit->primary); + submit->fence = fd_pipe_emit_fence(submit->pipe, submit->primary); return submit->funcs->flush(submit, in_fence_fd, out_fence_fd, out_fence); } diff --git a/src/freedreno/drm/msm_ringbuffer.c b/src/freedreno/drm/msm_ringbuffer.c index 903997204bf..5ce4c0f4185 100644 --- a/src/freedreno/drm/msm_ringbuffer.c +++ b/src/freedreno/drm/msm_ringbuffer.c @@ -343,6 +343,12 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, int *out_fence_fd, } } + simple_mtx_lock(&table_lock); + for (unsigned j = 0; j < msm_submit->nr_bos; j++) { + fd_bo_add_fence(msm_submit->bos[j], submit->pipe, submit->fence); + } + simple_mtx_unlock(&table_lock); + if (in_fence_fd != -1) { req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; req.fence_fd = in_fence_fd; diff --git a/src/freedreno/drm/msm_ringbuffer_sp.c b/src/freedreno/drm/msm_ringbuffer_sp.c index 71e9af60692..a2a00ee7022 100644 --- a/src/freedreno/drm/msm_ringbuffer_sp.c +++ b/src/freedreno/drm/msm_ringbuffer_sp.c @@ -256,11 +256,16 @@ msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, } else { submit_bos = malloc(msm_submit->nr_bos * sizeof(submit_bos[0])); } + + simple_mtx_lock(&table_lock); for (unsigned i = 0; i < msm_submit->nr_bos; i++) { submit_bos[i].flags = msm_submit->bos[i]->flags; submit_bos[i].handle = msm_submit->bos[i]->handle; submit_bos[i].presumed = 0; + fd_bo_add_fence(msm_submit->bos[i], submit->pipe, submit->fence); } + simple_mtx_unlock(&table_lock); + req.bos = VOID2U64(submit_bos), req.nr_bos = msm_submit->nr_bos; req.cmds = VOID2U64(cmds), req.nr_cmds = primary->u.nr_cmds;