From 7e250033eef2111aaf33d4e92ad92408da87b5e1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 11 Jul 2024 10:41:46 -0700 Subject: [PATCH] freedreno/bc: Rework flush order A sequence like: 1) set_framebuffer_state(A) 2) clears and/or draws 3) set_framebuffer_state(B) 4) context_flush(ASYNC) would result in the fence for batch B being returned, instead of batch A. Resulting in a fence that signals too early. Instead, in context_flush() find the most recently modified batch, so that in the example above batch A would be flushed. There is one wrinkle, that we want to avoid a dependency loop. So the 'last_batch' can actually be a batch that depends on the most recently modified batch. But in either case, the batch that is used for the returned fence is one that directly or indirectly depends on all other batches associated with the context. Signed-off-by: Rob Clark Part-of: --- .../drivers/freedreno/freedreno_batch.h | 4 + .../drivers/freedreno/freedreno_batch_cache.c | 127 ++++++++++-------- .../drivers/freedreno/freedreno_batch_cache.h | 3 +- .../drivers/freedreno/freedreno_context.c | 21 ++- .../drivers/freedreno/freedreno_context.h | 9 ++ 5 files changed, 96 insertions(+), 68 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index b419d8ec690..42d5a23741a 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -110,6 +110,9 @@ struct fd_batch { struct fd_context *ctx; + /* update seqno of most recent draw/etc to the batch. */ + uint32_t update_seqno; + /* do we need to mem2gmem before rendering. We don't, if for example, * there was a glClear() that invalidated the entire previous buffer * contents. Keep track of which buffer(s) are cleared, or needs @@ -398,6 +401,7 @@ static inline void fd_batch_needs_flush(struct fd_batch *batch) { batch->needs_flush = true; + batch->update_seqno = ++batch->ctx->update_count; fd_pipe_fence_ref(&batch->ctx->last_fence, NULL); } diff --git a/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/src/gallium/drivers/freedreno/freedreno_batch_cache.c index 5198cbb6cfa..7381188f8a3 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch_cache.c +++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.c @@ -141,9 +141,61 @@ fd_bc_fini(struct fd_batch_cache *cache) _mesa_hash_table_destroy(cache->ht, NULL); } -/* Flushes all batches in the batch cache. Used at glFlush() and similar times. */ +/* Find a batch that depends on last_batch (recursively if needed). + * The returned batch should not be depended on by any other batch. + */ +static struct fd_batch * +find_dependee(struct fd_context *ctx, struct fd_batch *last_batch) + assert_dt +{ + struct fd_batch_cache *cache = &ctx->screen->batch_cache; + struct fd_batch *batch; + + foreach_batch (batch, cache, cache->batch_mask) { + if (batch->ctx == ctx && fd_batch_has_dep(batch, last_batch)) { + fd_batch_reference_locked(&last_batch, batch); + return find_dependee(ctx, last_batch); + } + } + + return last_batch; +} + +/* This returns the last batch to be flushed. This is _approximately_ the + * last batch to be modified, but it could be a batch that depends on the + * last modified batch. + */ +struct fd_batch * +fd_bc_last_batch(struct fd_context *ctx) +{ + struct fd_batch_cache *cache = &ctx->screen->batch_cache; + struct fd_batch *batch, *last_batch = NULL; + + fd_screen_lock(ctx->screen); + + foreach_batch (batch, cache, cache->batch_mask) { + if (batch->ctx == ctx) { + if (!last_batch || + /* Note: fd_fence_before() handles rollover for us: */ + fd_fence_before(last_batch->update_seqno, batch->update_seqno)) { + fd_batch_reference_locked(&last_batch, batch); + } + } + } + + if (last_batch) + last_batch = find_dependee(ctx, last_batch); + + fd_screen_unlock(ctx->screen); + + return last_batch; +} + +/* Make the current batch depend on all other batches. So all other + * batches will be flushed before the current batch. + */ void -fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt +fd_bc_add_flush_deps(struct fd_context *ctx, struct fd_batch *last_batch) { struct fd_batch_cache *cache = &ctx->screen->batch_cache; @@ -155,6 +207,14 @@ fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt struct fd_batch *batch; unsigned n = 0; + assert(last_batch->ctx == ctx); + +#ifndef NDEBUG + struct fd_batch *tmp = fd_bc_last_batch(ctx); + assert(tmp == last_batch); + fd_batch_reference(&tmp, NULL); +#endif + fd_screen_lock(ctx->screen); foreach_batch (batch, cache, cache->batch_mask) { @@ -163,64 +223,19 @@ fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt } } - /* deferred flush doesn't actually flush, but it marks every other - * batch associated with the context as dependent on the current - * batch. So when the current batch gets flushed, all other batches - * that came before also get flushed. - */ - if (deferred) { - struct fd_batch *current_batch = fd_context_batch(ctx); - struct fd_batch *deps[ARRAY_SIZE(cache->batches)] = {0}; - unsigned ndeps = 0; + for (unsigned i = 0; i < n; i++) { + if (batches[i] && (batches[i] != last_batch)) { + /* fd_bc_last_batch() should ensure that no other batch depends + * on last_batch. This is needed to avoid dependency loop. + */ + assert(!fd_batch_has_dep(batches[i], last_batch)); - /* To avoid a dependency loop, pull out any batches that already - * have a dependency on the current batch. This ensures the - * following loop adding a dependency to the current_batch, all - * remaining batches do not have a direct or indirect dependency - * on the current_batch. - * - * The batches that have a dependency on the current batch will - * be flushed immediately (after dropping screen lock) instead - */ - for (unsigned i = 0; i < n; i++) { - if ((batches[i] != current_batch) && - fd_batch_has_dep(batches[i], current_batch)) { - /* We can't immediately flush while we hold the screen lock, - * but that doesn't matter. We just want to skip adding any - * deps that would result in a loop, we can flush after we've - * updated the dependency graph and dropped the lock. - */ - fd_batch_reference_locked(&deps[ndeps++], batches[i]); - fd_batch_reference_locked(&batches[i], NULL); - } - } - - for (unsigned i = 0; i < n; i++) { - if (batches[i] && (batches[i] != current_batch) && - (batches[i]->ctx == current_batch->ctx)) { - fd_batch_add_dep(current_batch, batches[i]); - } - } - - fd_batch_reference_locked(¤t_batch, NULL); - - fd_screen_unlock(ctx->screen); - - /* If we have any batches that we could add a dependency on (unlikely) - * flush them immediately. - */ - for (unsigned i = 0; i < ndeps; i++) { - fd_batch_flush(deps[i]); - fd_batch_reference(&deps[i], NULL); - } - } else { - fd_screen_unlock(ctx->screen); - - for (unsigned i = 0; i < n; i++) { - fd_batch_flush(batches[i]); + fd_batch_add_dep(last_batch, batches[i]); } } + fd_screen_unlock(ctx->screen); + for (unsigned i = 0; i < n; i++) { fd_batch_reference(&batches[i], NULL); } diff --git a/src/gallium/drivers/freedreno/freedreno_batch_cache.h b/src/gallium/drivers/freedreno/freedreno_batch_cache.h index e118955676f..be52ee2385e 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch_cache.h +++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.h @@ -68,7 +68,8 @@ struct fd_batch_cache { void fd_bc_init(struct fd_batch_cache *cache); void fd_bc_fini(struct fd_batch_cache *cache); -void fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt; +struct fd_batch *fd_bc_last_batch(struct fd_context *ctx) assert_dt; +void fd_bc_add_flush_deps(struct fd_context *ctx, struct fd_batch *last_batch) assert_dt; void fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt; void fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt; void fd_bc_dump(struct fd_context *ctx, const char *fmt, ...) diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 6bd25687d7c..ea2a45a2a3c 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -47,21 +47,13 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fencep, { struct fd_context *ctx = fd_context(pctx); struct pipe_fence_handle *fence = NULL; - struct fd_batch *batch = NULL; - - /* We want to lookup current batch if it exists, but not create a new - * one if not (unless we need a fence) - */ - fd_batch_reference(&batch, ctx->batch); + struct fd_batch *batch = fd_bc_last_batch(ctx); DBG("%p: %p: flush: flags=%x, fencep=%p", ctx, batch, flags, fencep); if (fencep && !batch) { batch = fd_context_batch(ctx); } else if (!batch) { - if (ctx->screen->reorder) - fd_bc_flush(ctx, flags & PIPE_FLUSH_DEFERRED); - fd_bc_dump(ctx, "%p: NULL batch, remaining:\n", ctx); return; } @@ -134,7 +126,9 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fencep, if (!ctx->screen->reorder) { fd_batch_flush(batch); } else { - fd_bc_flush(ctx, flags & PIPE_FLUSH_DEFERRED); + fd_bc_add_flush_deps(ctx, batch); + if (!(flags & PIPE_FLUSH_DEFERRED)) + fd_batch_flush(batch); } fd_bc_dump(ctx, "%p: remaining:\n", ctx); @@ -406,7 +400,12 @@ fd_context_destroy(struct pipe_context *pctx) fd_batch_reference(&ctx->batch, NULL); /* unref current batch */ /* Make sure nothing in the batch cache references our context any more. */ - fd_bc_flush(ctx, false); + struct fd_batch *batch = fd_bc_last_batch(ctx); + if (batch) { + fd_bc_add_flush_deps(ctx, batch); + fd_batch_flush(batch); + fd_batch_reference(&batch, NULL); + } fd_prog_fini(pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 4e61e5a6792..44301696abf 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -381,6 +381,15 @@ struct fd_context { */ struct pipe_fence_handle *last_fence dt; + /* + * Counter to keep track of batch's most recent update. Ie. the batch with + * the higher update count is the one that has been drawn/etc to the most + * recently (and therefore shouldn't have any other batch that should be + * flushed after it). This is used to figure out which fence to use at + * context flush time. + */ + uint32_t update_count; + /* Fence fd we are told to wait on via ->fence_server_sync() (or -1 * if none). The in-fence is transferred over to the batch on the * next draw/blit/grid.