From cd38b3ec5478db0f2ef72eaab2d0c8579a5e98c7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 2 Oct 2021 11:16:18 -0700 Subject: [PATCH] Revert "freedreno: Move the batch cache to the context." This reverts commit b2349a46715e870d7f38ce5f3854bef9718a835c. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5441 Part-of: --- .../drivers/freedreno/a5xx/fd5_blitter.c | 4 + .../drivers/freedreno/a6xx/fd6_blitter.c | 4 + .../drivers/freedreno/freedreno_batch.c | 204 ++++++------ .../drivers/freedreno/freedreno_batch.h | 44 ++- .../drivers/freedreno/freedreno_batch_cache.c | 291 ++++++++++++------ .../drivers/freedreno/freedreno_batch_cache.h | 36 ++- .../drivers/freedreno/freedreno_context.c | 6 +- .../drivers/freedreno/freedreno_context.h | 3 - .../drivers/freedreno/freedreno_draw.c | 12 + .../drivers/freedreno/freedreno_query_acc.c | 2 + .../drivers/freedreno/freedreno_query_hw.c | 2 +- .../drivers/freedreno/freedreno_resource.c | 138 ++++----- .../drivers/freedreno/freedreno_resource.h | 112 ++++--- .../drivers/freedreno/freedreno_screen.c | 3 + .../drivers/freedreno/freedreno_screen.h | 3 +- .../drivers/freedreno/freedreno_state.c | 13 +- 16 files changed, 555 insertions(+), 322 deletions(-) diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c index f67441a63d4..fe2bba5ef2b 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c @@ -446,9 +446,13 @@ fd5_blitter_blit(struct fd_context *ctx, batch = fd_bc_alloc_batch(ctx, true); + fd_screen_lock(ctx->screen); + fd_batch_resource_read(batch, src); fd_batch_resource_write(batch, dst); + fd_screen_unlock(ctx->screen); + DBG_BLIT(info, batch); fd_batch_update_queries(batch); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c index 84b88714089..c7d893c14f0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c @@ -916,9 +916,13 @@ handle_rgba_blit(struct fd_context *ctx, batch = fd_bc_alloc_batch(ctx, true); + fd_screen_lock(ctx->screen); + fd_batch_resource_read(batch, src); fd_batch_resource_write(batch, dst); + fd_screen_unlock(ctx->screen); + ASSERTED bool ret = fd_batch_lock_submit(batch); assert(ret); diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index dc1a850dc2b..6bc1e06969b 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -30,7 +30,6 @@ #include "util/u_string.h" #include "freedreno_batch.h" -#include "freedreno_batch_cache.h" #include "freedreno_context.h" #include "freedreno_fence.h" #include "freedreno_query_hw.h" @@ -139,8 +138,6 @@ fd_batch_create(struct fd_context *ctx, bool nondraw) batch->resources = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - batch->dependents = - _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); batch_init(batch); @@ -189,8 +186,6 @@ cleanup_submit(struct fd_batch *batch) fd_submit_del(batch->submit); batch->submit = NULL; - - util_copy_framebuffer_state(&batch->framebuffer, NULL); } static void @@ -232,58 +227,46 @@ batch_fini(struct fd_batch *batch) u_trace_fini(&batch->trace); } -/* Flushes any batches that this batch depends on, recursively. */ static void batch_flush_dependencies(struct fd_batch *batch) assert_dt { - set_foreach (batch->dependents, entry) { - struct fd_batch *dep = (void *)entry->key; + struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; + struct fd_batch *dep; + + foreach_batch (dep, cache, batch->dependents_mask) { fd_batch_flush(dep); fd_batch_reference(&dep, NULL); } - _mesa_set_clear(batch->dependents, NULL); + + batch->dependents_mask = 0; } static void batch_reset_dependencies(struct fd_batch *batch) { - set_foreach (batch->dependents, entry) { - struct fd_batch *dep = (void *)entry->key; + struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; + struct fd_batch *dep; + + foreach_batch (dep, cache, batch->dependents_mask) { fd_batch_reference(&dep, NULL); } - _mesa_set_clear(batch->dependents, NULL); + + batch->dependents_mask = 0; } static void batch_reset_resources(struct fd_batch *batch) { - struct fd_batch_cache *cache = &batch->ctx->batch_cache; + fd_screen_assert_locked(batch->ctx->screen); set_foreach (batch->resources, entry) { struct fd_resource *rsc = (struct fd_resource *)entry->key; - struct hash_entry *entry = - _mesa_hash_table_search_pre_hashed(cache->written_resources, rsc->hash, rsc); - if (entry) { - struct fd_batch *write_batch = entry->data; - assert(write_batch == batch); - - struct pipe_resource *table_ref = &rsc->b.b; - pipe_resource_reference(&table_ref, NULL); - - fd_batch_reference(&write_batch, NULL); - _mesa_hash_table_remove(cache->written_resources, entry); - } - - ASSERTED int32_t count = p_atomic_dec_return(&rsc->batch_references); - assert(count >= 0); - - struct pipe_resource *table_ref = &rsc->b.b; - pipe_resource_reference(&table_ref, NULL); + _mesa_set_remove(batch->resources, entry); + debug_assert(rsc->track->batch_mask & (1 << batch->idx)); + rsc->track->batch_mask &= ~(1 << batch->idx); + if (rsc->track->write_batch == batch) + fd_batch_reference_locked(&rsc->track->write_batch, NULL); } - /* Clear at the end so if the batch is reused we get a fully empty set - * rather than having any deleted keys. - */ - _mesa_set_clear(batch->resources, NULL); } static void @@ -292,7 +275,11 @@ batch_reset(struct fd_batch *batch) assert_dt DBG("%p", batch); batch_reset_dependencies(batch); + + fd_screen_lock(batch->ctx->screen); batch_reset_resources(batch); + fd_screen_unlock(batch->ctx->screen); + batch_fini(batch); batch_init(batch); } @@ -307,23 +294,30 @@ fd_batch_reset(struct fd_batch *batch) void __fd_batch_destroy(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; + DBG("%p", batch); - fd_bc_free_key(batch); + fd_screen_assert_locked(batch->ctx->screen); + + fd_bc_invalidate_batch(batch, true); batch_reset_resources(batch); - assert(batch->resources->entries == 0); + debug_assert(batch->resources->entries == 0); _mesa_set_destroy(batch->resources, NULL); + fd_screen_unlock(ctx->screen); batch_reset_dependencies(batch); - assert(batch->dependents->entries == 0); - _mesa_set_destroy(batch->dependents, NULL); + debug_assert(batch->dependents_mask == 0); + util_copy_framebuffer_state(&batch->framebuffer, NULL); batch_fini(batch); simple_mtx_destroy(&batch->submit_lock); + free(batch->key); free(batch); + fd_screen_lock(ctx->screen); } void @@ -359,12 +353,20 @@ batch_flush(struct fd_batch *batch) assert_dt batch_flush_dependencies(batch); + fd_screen_lock(batch->ctx->screen); batch_reset_resources(batch); - fd_bc_free_key(batch); + /* NOTE: remove=false removes the batch from the hashtable, so future + * lookups won't cache-hit a flushed batch, but leaves the weak reference + * to the batch to avoid having multiple batches with same batch->idx, as + * that causes all sorts of hilarity. + */ + fd_bc_invalidate_batch(batch, false); batch->flushed = true; if (batch == batch->ctx->batch) - fd_batch_reference(&batch->ctx->batch, NULL); + fd_batch_reference_locked(&batch->ctx->batch, NULL); + + fd_screen_unlock(batch->ctx->screen); if (batch->fence) fd_fence_ref(&batch->ctx->last_fence, batch->fence); @@ -384,61 +386,78 @@ fd_batch_flush(struct fd_batch *batch) { struct fd_batch *tmp = NULL; - /* NOTE: Many callers pass in a ctx->batch or fd_bc_writer() batches without - * refcounting, which batch_flush will reset, so we need to hold a ref across - * the body of the flush. + /* NOTE: we need to hold an extra ref across the body of flush, + * since the last ref to this batch could be dropped when cleaning + * up used_resources */ fd_batch_reference(&tmp, batch); batch_flush(tmp); fd_batch_reference(&tmp, NULL); } -static uint32_t ASSERTED -dependents_contains(const struct fd_batch *haystack, const struct fd_batch *needle) +/* find a batches dependents mask, including recursive dependencies: */ +static uint32_t +recursive_dependents_mask(struct fd_batch *batch) { - if (haystack == needle) - return true; + struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; + struct fd_batch *dep; + uint32_t dependents_mask = batch->dependents_mask; - set_foreach (haystack->dependents, entry) { - if (dependents_contains(entry->key, needle)) - return true; - } + foreach_batch (dep, cache, batch->dependents_mask) + dependents_mask |= recursive_dependents_mask(dep); - return false; + return dependents_mask; } void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) { - if (_mesa_set_search(batch->dependents, dep)) + fd_screen_assert_locked(batch->ctx->screen); + + if (batch->dependents_mask & (1 << dep->idx)) return; /* a loop should not be possible */ - assert(!dependents_contains(dep, batch)); + debug_assert(!((1 << batch->idx) & recursive_dependents_mask(dep))); - struct fd_batch *table_ref = NULL; - fd_batch_reference(&table_ref, dep); - _mesa_set_add(batch->dependents, dep); + struct fd_batch *other = NULL; + fd_batch_reference_locked(&other, dep); + batch->dependents_mask |= (1 << dep->idx); DBG("%p: added dependency on %p", batch, dep); } +static void +flush_write_batch(struct fd_resource *rsc) assert_dt +{ + struct fd_batch *b = NULL; + fd_batch_reference_locked(&b, rsc->track->write_batch); + + fd_screen_unlock(b->ctx->screen); + fd_batch_flush(b); + fd_screen_lock(b->ctx->screen); + + fd_batch_reference_locked(&b, NULL); +} + static void fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc) { - bool found = false; - _mesa_set_search_or_add_pre_hashed(batch->resources, rsc->hash, rsc, &found); - if (!found) { - struct pipe_resource *table_ref = NULL; - pipe_resource_reference(&table_ref, &rsc->b.b); - p_atomic_inc(&rsc->batch_references); + + if (likely(fd_batch_references_resource(batch, rsc))) { + debug_assert(_mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc)); + return; } + + debug_assert(!_mesa_set_search(batch->resources, rsc)); + + _mesa_set_add_pre_hashed(batch->resources, rsc->hash, rsc); + rsc->track->batch_mask |= (1 << batch->idx); } void fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) { - struct fd_context *ctx = batch->ctx; - struct fd_batch_cache *cache = &ctx->batch_cache; + fd_screen_assert_locked(batch->ctx->screen); DBG("%p: write %p", batch, rsc); @@ -447,8 +466,7 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) */ rsc->valid = true; - struct fd_batch *prev_writer = fd_bc_writer(ctx, rsc); - if (prev_writer == batch) + if (rsc->track->write_batch == batch) return; fd_batch_write_prep(batch, rsc); @@ -456,21 +474,32 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) if (rsc->stencil) fd_batch_resource_write(batch, rsc->stencil); - /* Flush any other batches accessing our resource. Similar to - * fd_bc_flush_readers(). + /* note, invalidate write batch, to avoid further writes to rsc + * resulting in a write-after-read hazard. */ - foreach_batch (reader, cache) { - if (reader == batch || !fd_batch_references(reader, rsc)) - continue; - fd_batch_flush(reader); - } + /* if we are pending read or write by any other batch: */ + if (unlikely(rsc->track->batch_mask & ~(1 << batch->idx))) { + struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; + struct fd_batch *dep; - struct fd_batch *table_ref = NULL; - fd_batch_reference(&table_ref, batch); - struct pipe_resource *table_rsc_ref = NULL; - pipe_resource_reference(&table_rsc_ref, &rsc->b.b); - _mesa_hash_table_insert_pre_hashed(cache->written_resources, rsc->hash, rsc, - batch); + if (rsc->track->write_batch) + flush_write_batch(rsc); + + foreach_batch (dep, cache, rsc->track->batch_mask) { + struct fd_batch *b = NULL; + if (dep == batch) + continue; + /* note that batch_add_dep could flush and unref dep, so + * we need to hold a reference to keep it live for the + * fd_bc_invalidate_batch() + */ + fd_batch_reference(&b, dep); + fd_batch_add_dep(batch, b); + fd_bc_invalidate_batch(b, false); + fd_batch_reference_locked(&b, NULL); + } + } + fd_batch_reference_locked(&rsc->track->write_batch, batch); fd_batch_add_resource(batch, rsc); } @@ -478,16 +507,19 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) void fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc) { + fd_screen_assert_locked(batch->ctx->screen); + if (rsc->stencil) fd_batch_resource_read(batch, rsc->stencil); DBG("%p: read %p", batch, rsc); - struct fd_context *ctx = batch->ctx; - - struct fd_batch *writer = fd_bc_writer(ctx, rsc); - if (writer && writer != batch) - fd_batch_flush(writer); + /* If reading a resource pending a write, go ahead and flush the + * writer. This avoids situations where we end up having to + * flush the current batch in _resource_used() + */ + if (unlikely(rsc->track->write_batch && rsc->track->write_batch != batch)) + flush_write_batch(rsc); fd_batch_add_resource(batch, rsc); } diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index c6edab46ef1..7b554c3cceb 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -28,7 +28,6 @@ #define FREEDRENO_BATCH_H_ #include "util/list.h" -#include "util/set.h" #include "util/simple_mtx.h" #include "util/u_inlines.h" #include "util/u_queue.h" @@ -245,9 +244,8 @@ struct fd_batch { uint32_t query_tile_stride; /*@}*/ - /* Set of references to resources used by currently-unsubmitted batch (read - * or write). Note that this set may not include all BOs referenced by the - * batch due to fd_bc_resource_invalidate(). + /* Set of resources used by currently-unsubmitted batch (read or + * write).. does not hold a reference to the resource. */ struct set *resources; @@ -256,7 +254,7 @@ struct fd_batch { uint32_t hash; /** set of dependent batches.. holds refs to dependent batches: */ - struct set *dependents; + uint32_t dependents_mask; /* Buffer for tessellation engine input */ @@ -291,11 +289,30 @@ struct fd_batch_key *fd_batch_key_clone(void *mem_ctx, void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt; void __fd_batch_destroy(struct fd_batch *batch); +/* + * NOTE the rule is, you need to hold the screen->lock when destroying + * a batch.. so either use fd_batch_reference() (which grabs the lock + * for you) if you don't hold the lock, or fd_batch_reference_locked() + * if you do hold the lock. + * + * WARNING the _locked() version can briefly drop the lock. Without + * recursive mutexes, I'm not sure there is much else we can do (since + * __fd_batch_destroy() needs to unref resources) + * + * WARNING you must acquire the screen->lock and use the _locked() + * version in case that the batch being ref'd can disappear under + * you. + */ + static inline void -fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch) +fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch) { struct fd_batch *old_batch = *ptr; + /* only need lock if a reference is dropped: */ + if (old_batch) + fd_screen_assert_locked(old_batch->ctx->screen); + if (pipe_reference_described( &(*ptr)->reference, &batch->reference, (debug_reference_descriptor)__fd_batch_describe)) @@ -304,6 +321,21 @@ fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch) *ptr = batch; } +static inline void +fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch) +{ + struct fd_batch *old_batch = *ptr; + struct fd_context *ctx = old_batch ? old_batch->ctx : NULL; + + if (ctx) + fd_screen_lock(ctx->screen); + + fd_batch_reference_locked(ptr, batch); + + if (ctx) + fd_screen_unlock(ctx->screen); +} + static inline void fd_batch_unlock_submit(struct fd_batch *batch) { diff --git a/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/src/gallium/drivers/freedreno/freedreno_batch_cache.c index d0ef1147eb6..5c31475c223 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch_cache.c +++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.c @@ -129,30 +129,39 @@ fd_batch_key_clone(void *mem_ctx, const struct fd_batch_key *key) } void -fd_bc_init(struct fd_context *ctx) +fd_bc_init(struct fd_batch_cache *cache) { - struct fd_batch_cache *cache = &ctx->batch_cache; - cache->ht = _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals); - cache->written_resources = _mesa_pointer_hash_table_create(NULL); } void -fd_bc_fini(struct fd_context *ctx) assert_dt +fd_bc_fini(struct fd_batch_cache *cache) { - struct fd_batch_cache *cache = &ctx->batch_cache; - - fd_bc_flush(ctx, false); _mesa_hash_table_destroy(cache->ht, NULL); - _mesa_hash_table_destroy(cache->written_resources, NULL); } /* Flushes all batches in the batch cache. Used at glFlush() and similar times. */ void fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt { - struct fd_batch_cache *cache = &ctx->batch_cache; + struct fd_batch_cache *cache = &ctx->screen->batch_cache; + + /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly) + * can cause batches to be unref'd and freed under our feet, so grab + * a reference to all the batches we need up-front. + */ + struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0}; + struct fd_batch *batch; + unsigned n = 0; + + fd_screen_lock(ctx->screen); + + foreach_batch (batch, cache, cache->batch_mask) { + if (batch->ctx == ctx) { + fd_batch_reference_locked(&batches[n++], batch); + } + } /* deferred flush doesn't actually flush, but it marks every other * batch associated with the context as dependent on the current @@ -162,170 +171,245 @@ fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt if (deferred) { struct fd_batch *current_batch = fd_context_batch(ctx); - foreach_batch (batch, cache) { - if (batch != current_batch) - fd_batch_add_dep(current_batch, batch); + for (unsigned i = 0; i < n; i++) { + if (batches[i] && (batches[i]->ctx == ctx) && + (batches[i] != current_batch)) { + fd_batch_add_dep(current_batch, batches[i]); + } } - fd_batch_reference(¤t_batch, NULL); + + fd_batch_reference_locked(¤t_batch, NULL); + + fd_screen_unlock(ctx->screen); } else { - foreach_batch (batch, cache) { - fd_batch_flush(batch); + fd_screen_unlock(ctx->screen); + + for (unsigned i = 0; i < n; i++) { + fd_batch_flush(batches[i]); } } + + for (unsigned i = 0; i < n; i++) { + fd_batch_reference(&batches[i], NULL); + } } /** - * Flushes the batch (if any) writing this resource. + * Flushes the batch (if any) writing this resource. Must not hold the screen + * lock. */ void fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt { - struct fd_batch_cache *cache = &ctx->batch_cache; - struct hash_entry *entry = - _mesa_hash_table_search_pre_hashed(cache->written_resources, rsc->hash, rsc); - if (entry) { - fd_batch_flush(entry->data); + fd_screen_lock(ctx->screen); + struct fd_batch *write_batch = NULL; + fd_batch_reference_locked(&write_batch, rsc->track->write_batch); + fd_screen_unlock(ctx->screen); + + if (write_batch) { + fd_batch_flush(write_batch); + fd_batch_reference(&write_batch, NULL); } } /** - * Flushes any batches reading this resource. + * Flushes any batches reading this resource. Must not hold the screen lock. */ void fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt { - foreach_batch (batch, &ctx->batch_cache) { - if (fd_batch_references(batch, rsc)) - fd_batch_flush(batch); - } -} + struct fd_batch *batch, *batches[32] = {}; + uint32_t batch_count = 0; -/** - * Flushes any batches accessing this resource as part of the gmem key. - * - * Used in resource shadowing. - */ -void -fd_bc_flush_gmem_users(struct fd_context *ctx, struct fd_resource *rsc) -{ - foreach_batch (batch, &ctx->batch_cache) { - if (!batch->key) - continue; - for (int i = 0; i < batch->key->num_surfs; i++) { - if (batch->key->surf[i].texture == &rsc->b.b) { - struct fd_batch *tmp = NULL; - fd_batch_reference(&tmp, batch); - fd_batch_flush(batch); - fd_batch_reference(&tmp, NULL); - break; - } - } + /* This is a bit awkward, probably a fd_batch_flush_locked() + * would make things simpler.. but we need to hold the lock + * to iterate the batches which reference this resource. So + * we must first grab references under a lock, then flush. + */ + fd_screen_lock(ctx->screen); + foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask) + fd_batch_reference_locked(&batches[batch_count++], batch); + fd_screen_unlock(ctx->screen); + + for (int i = 0; i < batch_count; i++) { + fd_batch_flush(batches[i]); + fd_batch_reference(&batches[i], NULL); } } void fd_bc_dump(struct fd_context *ctx, const char *fmt, ...) { - struct fd_batch_cache *cache = &ctx->batch_cache; + struct fd_batch_cache *cache = &ctx->screen->batch_cache; if (!FD_DBG(MSGS)) return; + fd_screen_lock(ctx->screen); + va_list ap; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); - foreach_batch (batch, cache) { - printf(" %p<%u>%s\n", batch, batch->seqno, - batch->needs_flush ? ", NEEDS FLUSH" : ""); + for (int i = 0; i < ARRAY_SIZE(cache->batches); i++) { + struct fd_batch *batch = cache->batches[i]; + if (batch) { + printf(" %p<%u>%s\n", batch, batch->seqno, + batch->needs_flush ? ", NEEDS FLUSH" : ""); + } } printf("----\n"); + + fd_screen_unlock(ctx->screen); } -/* Removes a batch from the batch cache (typically after a flush) */ +/** + * Note that when batch is flushed, it needs to remain in the cache so + * that fd_bc_invalidate_resource() can work.. otherwise we can have + * the case where a rsc is destroyed while a batch still has a dangling + * reference to it. + * + * Note that the cmdstream (or, after the SUBMIT ioctl, the kernel) + * would have a reference to the underlying bo, so it is ok for the + * rsc to be destroyed before the batch. + */ void -fd_bc_free_key(struct fd_batch *batch) +fd_bc_invalidate_batch(struct fd_batch *batch, bool remove) { if (!batch) return; - struct fd_context *ctx = batch->ctx; - struct fd_batch_cache *cache = &ctx->batch_cache; + struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; + struct fd_batch_key *key = batch->key; - if (batch->key) { - _mesa_hash_table_remove_key(cache->ht, batch->key); - free((void *)batch->key); - batch->key = NULL; + fd_screen_assert_locked(batch->ctx->screen); + + if (remove) { + cache->batches[batch->idx] = NULL; + cache->batch_mask &= ~(1 << batch->idx); } -} -/* Called when the resource is has had its underlying storage replaced, so - * previous batch references to it are no longer relevant for flushing access to - * that storage. - */ -void -fd_bc_invalidate_resource(struct fd_context *ctx, struct fd_resource *rsc) -{ - foreach_batch (batch, &ctx->batch_cache) { - struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc); - if (entry) { - struct pipe_resource *table_ref = &rsc->b.b; - pipe_resource_reference(&table_ref, NULL); + if (!key) + return; - ASSERTED int32_t count = p_atomic_dec_return(&rsc->batch_references); - assert(count >= 0); - - _mesa_set_remove(batch->resources, entry); - } + DBG("%p: key=%p", batch, batch->key); + for (unsigned idx = 0; idx < key->num_surfs; idx++) { + struct fd_resource *rsc = fd_resource(key->surf[idx].texture); + rsc->track->bc_batch_mask &= ~(1 << batch->idx); } struct hash_entry *entry = - _mesa_hash_table_search(ctx->batch_cache.written_resources, rsc); - if (entry) { - struct fd_batch *batch = entry->data; - struct pipe_resource *table_ref = &rsc->b.b; - pipe_resource_reference(&table_ref, NULL); - _mesa_hash_table_remove(ctx->batch_cache.written_resources, entry); - fd_batch_reference(&batch, NULL); + _mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key); + _mesa_hash_table_remove(cache->ht, entry); +} + +void +fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy) +{ + struct fd_screen *screen = fd_screen(rsc->b.b.screen); + struct fd_batch *batch; + + fd_screen_lock(screen); + + if (destroy) { + foreach_batch (batch, &screen->batch_cache, rsc->track->batch_mask) { + struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc); + _mesa_set_remove(batch->resources, entry); + } + rsc->track->batch_mask = 0; + + fd_batch_reference_locked(&rsc->track->write_batch, NULL); } + + foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask) + fd_bc_invalidate_batch(batch, false); + + rsc->track->bc_batch_mask = 0; + + fd_screen_unlock(screen); } static struct fd_batch * -alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx, bool nondraw) assert_dt +alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx, + bool nondraw) assert_dt { struct fd_batch *batch; + uint32_t idx; - if (cache->ht->entries >= 32) { + fd_screen_assert_locked(ctx->screen); + + while ((idx = ffs(~cache->batch_mask)) == 0) { +#if 0 + for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) { + batch = cache->batches[i]; + debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush); + set_foreach (batch->dependencies, entry) { + struct fd_batch *dep = (struct fd_batch *)entry->key; + debug_printf(" %d", dep->idx); + } + debug_printf("\n"); + } +#endif /* TODO: is LRU the better policy? Or perhaps the batch that * depends on the fewest other batches? */ struct fd_batch *flush_batch = NULL; - foreach_batch (batch, cache) { - if (!flush_batch || (batch->seqno < flush_batch->seqno)) - fd_batch_reference(&flush_batch, batch); + for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) { + if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno)) + fd_batch_reference_locked(&flush_batch, cache->batches[i]); } + /* we can drop lock temporarily here, since we hold a ref, + * flush_batch won't disappear under us. + */ + fd_screen_unlock(ctx->screen); DBG("%p: too many batches! flush forced!", flush_batch); fd_batch_flush(flush_batch); + fd_screen_lock(ctx->screen); - fd_batch_reference(&flush_batch, NULL); + /* While the resources get cleaned up automatically, the flush_batch + * doesn't get removed from the dependencies of other batches, so + * it won't be unref'd and will remain in the table. + * + * TODO maybe keep a bitmask of batches that depend on me, to make + * this easier: + */ + for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) { + struct fd_batch *other = cache->batches[i]; + if (!other) + continue; + if (other->dependents_mask & (1 << flush_batch->idx)) { + other->dependents_mask &= ~(1 << flush_batch->idx); + struct fd_batch *ref = flush_batch; + fd_batch_reference_locked(&ref, NULL); + } + } + + fd_batch_reference_locked(&flush_batch, NULL); } + idx--; /* bit zero returns 1 for ffs() */ + batch = fd_batch_create(ctx, nondraw); if (!batch) return NULL; batch->seqno = cache->cnt++; + batch->idx = idx; + cache->batch_mask |= (1 << idx); + + debug_assert(cache->batches[idx] == NULL); + cache->batches[idx] = batch; return batch; } struct fd_batch * -fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw) assert_dt +fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw) { - struct fd_batch_cache *cache = &ctx->batch_cache; + struct fd_batch_cache *cache = &ctx->screen->batch_cache; struct fd_batch *batch; /* For normal draw batches, pctx->set_framebuffer_state() handles @@ -335,7 +419,9 @@ fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw) assert_dt if (nondraw) fd_context_switch_from(ctx); - batch = alloc_batch(cache, ctx, nondraw); + fd_screen_lock(ctx->screen); + batch = alloc_batch_locked(cache, ctx, nondraw); + fd_screen_unlock(ctx->screen); if (batch && nondraw) fd_context_switch_to(ctx, batch); @@ -346,7 +432,7 @@ fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw) assert_dt static struct fd_batch * batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt { - struct fd_batch_cache *cache = &ctx->batch_cache; + struct fd_batch_cache *cache = &ctx->screen->batch_cache; struct fd_batch *batch = NULL; uint32_t hash = fd_batch_key_hash(key); struct hash_entry *entry = @@ -354,12 +440,12 @@ batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt if (entry) { free(key); - fd_batch_reference(&batch, (struct fd_batch *)entry->data); + fd_batch_reference_locked(&batch, (struct fd_batch *)entry->data); assert(!batch->flushed); return batch; } - batch = alloc_batch(cache, ctx, false); + batch = alloc_batch_locked(cache, ctx, false); #ifdef DEBUG DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash, key->width, key->height, key->layers, key->samples); @@ -387,6 +473,11 @@ batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt batch->key = key; batch->hash = hash; + for (unsigned idx = 0; idx < key->num_surfs; idx++) { + struct fd_resource *rsc = fd_resource(key->surf[idx].texture); + rsc->track->bc_batch_mask = (1 << batch->idx); + } + return batch; } @@ -423,5 +514,9 @@ fd_batch_from_fb(struct fd_context *ctx, key->num_surfs = idx; - return batch_from_key(ctx, key); + fd_screen_lock(ctx->screen); + struct fd_batch *batch = batch_from_key(ctx, key); + fd_screen_unlock(ctx->screen); + + return batch; } diff --git a/src/gallium/drivers/freedreno/freedreno_batch_cache.h b/src/gallium/drivers/freedreno/freedreno_batch_cache.h index cce2b7aaf9c..4b2737bac80 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch_cache.h +++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.h @@ -28,7 +28,6 @@ #define FREEDRENO_BATCH_CACHE_H_ #include "pipe/p_state.h" -#include "util/hash_table.h" #include "freedreno_util.h" @@ -40,32 +39,41 @@ struct fd_screen; struct hash_table; struct fd_batch_cache { - /* Mapping from struct key (basically framebuffer state) to a batch of - * rendering for rendering to that target. - */ struct hash_table *ht; unsigned cnt; - /* Mapping from struct pipe_resource to the batch writing it. Keeps a reference. */ - struct hash_table *written_resources; + /* set of active batches.. there is an upper limit on the number of + * in-flight batches, for two reasons: + * 1) to avoid big spikes in number of batches in edge cases, such as + * game startup (ie, lots of texture uploads, but no usages yet of + * the textures), etc. + * 2) so we can use a simple bitmask in fd_resource to track which + * batches have reference to the resource + */ + struct fd_batch *batches[32]; + uint32_t batch_mask; }; -#define foreach_batch(batch, cache) \ - hash_table_foreach ((cache)->ht, _batch_entry) \ - for (struct fd_batch *batch = _batch_entry->data; batch != NULL; batch = NULL) +/* note: if batches get unref'd in the body of the loop, they are removed + * from the various masks.. but since we copy the mask at the beginning of + * the loop into _m, we need the &= at the end of the loop to make sure + * we don't have stale bits in _m + */ +#define foreach_batch(batch, cache, mask) \ + for (uint32_t _m = (mask); \ + _m && ((batch) = (cache)->batches[u_bit_scan(&_m)]); _m &= (mask)) -void fd_bc_init(struct fd_context *ctx); -void fd_bc_fini(struct fd_context *ctx); +void fd_bc_init(struct fd_batch_cache *cache); +void fd_bc_fini(struct fd_batch_cache *cache); void fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt; void fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt; void fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt; -void fd_bc_flush_gmem_users(struct fd_context *ctx, struct fd_resource *rsc) assert_dt; void fd_bc_dump(struct fd_context *ctx, const char *fmt, ...) _util_printf_format(2, 3); -void fd_bc_free_key(struct fd_batch *batch); -void fd_bc_invalidate_resource(struct fd_context *ctx, struct fd_resource *rsc); +void fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy); +void fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy); struct fd_batch *fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw) assert_dt; diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 7587b35a507..43ab14f8a8c 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -353,7 +353,9 @@ fd_context_destroy(struct pipe_context *pctx) util_copy_framebuffer_state(&ctx->framebuffer, NULL); fd_batch_reference(&ctx->batch, NULL); /* unref current batch */ - fd_bc_fini(ctx); + + /* Make sure nothing in the batch cache references our context any more. */ + fd_bc_flush(ctx, false); fd_prog_fini(pctx); @@ -640,8 +642,6 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, slab_create_child(&ctx->transfer_pool, &screen->transfer_pool); slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool); - fd_bc_init(ctx); - fd_draw_init(pctx); fd_resource_context_init(pctx); fd_query_context_init(pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 67bdba50718..9da86bf27d2 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -37,7 +37,6 @@ #include "util/perf/u_trace.h" #include "freedreno_autotune.h" -#include "freedreno_batch_cache.h" #include "freedreno_gmem.h" #include "freedreno_perfetto.h" #include "freedreno_screen.h" @@ -213,8 +212,6 @@ struct fd_context { */ simple_mtx_t gmem_lock; - struct fd_batch_cache batch_cache; - struct fd_device *dev; struct fd_screen *screen; struct fd_pipe *pipe; diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index e5d430182be..baa5f2e5530 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -197,6 +197,8 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info, * Figure out the buffers/features we need: */ + fd_screen_lock(ctx->screen); + if (ctx->dirty & FD_DIRTY_RESOURCE) batch_draw_tracking_for_dirty_bits(batch); @@ -218,6 +220,8 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info, list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node) resource_written(batch, aq->prsc); + + fd_screen_unlock(ctx->screen); } static void @@ -395,6 +399,8 @@ batch_clear_tracking(struct fd_batch *batch, unsigned buffers) assert_dt batch->resolve |= buffers; + fd_screen_lock(ctx->screen); + if (buffers & PIPE_CLEAR_COLOR) for (unsigned i = 0; i < pfb->nr_cbufs; i++) if (buffers & (PIPE_CLEAR_COLOR0 << i)) @@ -409,6 +415,8 @@ batch_clear_tracking(struct fd_batch *batch, unsigned buffers) assert_dt list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node) resource_written(batch, aq->prsc); + + fd_screen_unlock(ctx->screen); } static void @@ -511,6 +519,8 @@ fd_launch_grid(struct pipe_context *pctx, fd_batch_reference(&ctx->batch, batch); fd_context_all_dirty(ctx); + fd_screen_lock(ctx->screen); + /* Mark SSBOs */ u_foreach_bit (i, so->enabled_mask & so->writable_mask) resource_written(batch, so->sb[i].buffer); @@ -543,6 +553,8 @@ fd_launch_grid(struct pipe_context *pctx, if (info->indirect) resource_read(batch, info->indirect); + fd_screen_unlock(ctx->screen); + DBG("%p: work_dim=%u, block=%ux%ux%u, grid=%ux%ux%u", batch, info->work_dim, info->block[0], info->block[1], info->block[2], diff --git a/src/gallium/drivers/freedreno/freedreno_query_acc.c b/src/gallium/drivers/freedreno/freedreno_query_acc.c index 518f792425a..74da4ce8ab2 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_acc.c +++ b/src/gallium/drivers/freedreno/freedreno_query_acc.c @@ -89,7 +89,9 @@ fd_acc_query_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt fd_batch_needs_flush(aq->batch); p->resume(aq, aq->batch); + fd_screen_lock(batch->ctx->screen); fd_batch_resource_write(batch, fd_resource(aq->prsc)); + fd_screen_unlock(batch->ctx->screen); } static void diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c index 5a2ed47a837..560cf1bba39 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -340,7 +340,7 @@ fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles) uint32_t tile_stride = batch->next_sample_offset; if (tile_stride > 0) - fd_resource_resize(batch->ctx, batch->query_buf, tile_stride * num_tiles); + fd_resource_resize(batch->query_buf, tile_stride * num_tiles); batch->query_tile_stride = tile_stride; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index f09d194ea2f..add830d9c12 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -191,7 +191,7 @@ __fd_resource_wait(struct fd_context *ctx, struct fd_resource *rsc, unsigned op, } static void -realloc_bo(struct fd_context *ctx, struct fd_resource *rsc, uint32_t size) +realloc_bo(struct fd_resource *rsc, uint32_t size) { struct pipe_resource *prsc = &rsc->b.b; struct fd_screen *screen = fd_screen(rsc->b.b.screen); @@ -224,8 +224,7 @@ realloc_bo(struct fd_context *ctx, struct fd_resource *rsc, uint32_t size) } util_range_set_empty(&rsc->valid_buffer_range); - if (ctx) - fd_bc_invalidate_resource(ctx, rsc); + fd_bc_invalidate_resource(rsc, true); } static void @@ -269,20 +268,20 @@ fd_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resource *pdst, */ assert(pdst->target == PIPE_BUFFER); assert(psrc->target == PIPE_BUFFER); -#ifndef NDEBUG - foreach_batch (batch, &ctx->batch_cache) { - assert(!fd_batch_references(batch, src)); - } -#endif + assert(dst->track->bc_batch_mask == 0); + assert(src->track->bc_batch_mask == 0); + assert(src->track->batch_mask == 0); + assert(src->track->write_batch == NULL); assert(memcmp(&dst->layout, &src->layout, sizeof(dst->layout)) == 0); - /* get rid of any references that batch-cache might have to us. + /* get rid of any references that batch-cache might have to us (which + * should empty/destroy rsc->batches hashset) * * Note that we aren't actually destroying dst, but we are replacing * it's storage so we want to go thru the same motions of decoupling * it's batch connections. */ - fd_bc_invalidate_resource(ctx, dst); + fd_bc_invalidate_resource(dst, true); rebind_resource(dst); util_idalloc_mt_free(&ctx->screen->buffer_ids, delete_buffer_id); @@ -292,6 +291,7 @@ fd_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resource *pdst, fd_bo_del(dst->bo); dst->bo = fd_bo_ref(src->bo); + fd_resource_tracking_reference(&dst->track, src->track); src->is_replacement = true; dst->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno); @@ -319,11 +319,7 @@ fd_resource_busy(struct pipe_screen *pscreen, struct pipe_resource *prsc, { struct fd_resource *rsc = fd_resource(prsc); - /* Note that while TC *can* ask us about busy for MAP_READ to promote to an - * unsync map, it's not a case we have ever seen a need to optimize for, and - * being conservative in resource_busy() is safe. - */ - if (p_atomic_read(&rsc->batch_references) != 0) + if (pending(rsc, !!(usage & PIPE_MAP_WRITE))) return true; if (resource_busy(rsc, translate_usage(usage))) @@ -362,6 +358,8 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, { struct pipe_context *pctx = &ctx->base; struct pipe_resource *prsc = &rsc->b.b; + struct fd_screen *screen = fd_screen(pctx->screen); + struct fd_batch *batch; bool fallback = false; if (prsc->next) @@ -382,7 +380,9 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, * Note that being in the gmem key doesn't necessarily mean the * batch was considered a writer! */ - fd_bc_flush_gmem_users(ctx, rsc); + foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask) { + fd_batch_flush(batch); + } /* TODO: somehow munge dimensions and format to copy unsupported * render target format to something that is supported? @@ -415,11 +415,14 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, assert(!ctx->in_shadow); ctx->in_shadow = true; - /* Signal that any HW state pointing at this resource needs to be - * re-emitted. + /* get rid of any references that batch-cache might have to us (which + * should empty/destroy rsc->batches hashset) */ + fd_bc_invalidate_resource(rsc, false); rebind_resource(rsc); + fd_screen_lock(ctx->screen); + /* Swap the backing bo's, so shadow becomes the old buffer, * blit from shadow to new buffer. From here on out, we * cannot fail. @@ -430,8 +433,8 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, */ struct fd_resource *shadow = fd_resource(pshadow); - DBG("shadow: %p (%d) -> %p (%d)", rsc, rsc->b.b.reference.count, - shadow, shadow->b.b.reference.count); + DBG("shadow: %p (%d, %p) -> %p (%d, %p)", rsc, rsc->b.b.reference.count, + rsc->track, shadow, shadow->b.b.reference.count, shadow->track); swap(rsc->bo, shadow->bo); swap(rsc->valid, shadow->valid); @@ -444,27 +447,19 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, swap(rsc->layout, shadow->layout); rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno); - /* At this point, the newly created shadow buffer is not referenced by any - * batches, but the existing rsc may be as a reader (we flushed writers - * above). We want to remove the old reader references to rsc so that we - * don't cause unnecessary synchronization on the write we're about to emit, - * but we don't need to bother with transferring the references over to - * shadow because right after our blit (which is also doing a read so it only - * cares about the lack of a writer) shadow will be freed. + /* at this point, the newly created shadow buffer is not referenced + * by any batches, but the existing rsc (probably) is. We need to + * transfer those references over: */ - assert(!pending(ctx, shadow, true)); - foreach_batch (batch, &ctx->batch_cache) { + debug_assert(shadow->track->batch_mask == 0); + foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask) { struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc); - if (entry) { - struct pipe_resource *table_ref = &rsc->b.b; - pipe_resource_reference(&table_ref, NULL); - - ASSERTED int32_t count = p_atomic_dec_return(&rsc->batch_references); - assert(count >= 0); - - _mesa_set_remove(batch->resources, entry); - } + _mesa_set_remove(batch->resources, entry); + _mesa_set_add_pre_hashed(batch->resources, shadow->hash, shadow); } + swap(rsc->track, shadow->track); + + fd_screen_unlock(ctx->screen); struct pipe_blit_info blit = {}; blit.dst.resource = prsc; @@ -731,14 +726,14 @@ fd_resource_transfer_unmap(struct pipe_context *pctx, } static void -invalidate_resource(struct fd_context *ctx, struct fd_resource *rsc, unsigned usage) assert_dt +invalidate_resource(struct fd_resource *rsc, unsigned usage) assert_dt { - bool needs_flush = pending(ctx, rsc, !!(usage & PIPE_MAP_WRITE)); + bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE)); unsigned op = translate_usage(usage); if (needs_flush || resource_busy(rsc, op)) { rebind_resource(rsc); - realloc_bo(ctx, rsc, fd_bo_size(rsc->bo)); + realloc_bo(rsc, fd_bo_size(rsc->bo)); } else { util_range_set_empty(&rsc->valid_buffer_range); } @@ -830,10 +825,10 @@ resource_transfer_map(struct pipe_context *pctx, struct pipe_resource *prsc, } if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) { - invalidate_resource(ctx, rsc, usage); + invalidate_resource(rsc, usage); } else { unsigned op = translate_usage(usage); - bool needs_flush = pending(ctx, rsc, !!(usage & PIPE_MAP_WRITE)); + bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE)); /* If the GPU is writing to the resource, or if it is reading from the * resource and we're trying to write to it, flush the renders. @@ -1004,13 +999,8 @@ fd_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc) struct fd_screen *screen = fd_screen(prsc->screen); struct fd_resource *rsc = fd_resource(prsc); - /* Note that a destroyed resource may be present in an outstanding - * batch->resources (not as a batch_cache->written_resource, since those - * have references to resources). However, all that means is that we might - * miss an opportunity to reorder a batch by spuriously detecting a newly - * created resource as still in use and flush the old reader. - */ - + if (!rsc->is_replacement) + fd_bc_invalidate_resource(rsc, true); if (rsc->bo) fd_bo_del(rsc->bo); if (rsc->lrz) @@ -1025,6 +1015,7 @@ fd_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc) util_range_destroy(&rsc->valid_buffer_range); simple_mtx_destroy(&rsc->lock); + fd_resource_tracking_reference(&rsc->track, NULL); FREE(rsc); } @@ -1061,7 +1052,7 @@ fd_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *pctx, /* special case to resize query buf after allocated.. */ void -fd_resource_resize(struct fd_context *ctx, struct pipe_resource *prsc, uint32_t sz) +fd_resource_resize(struct pipe_resource *prsc, uint32_t sz) { struct fd_resource *rsc = fd_resource(prsc); @@ -1070,7 +1061,7 @@ fd_resource_resize(struct fd_context *ctx, struct pipe_resource *prsc, uint32_t debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); prsc->width0 = sz; - realloc_bo(ctx, rsc, fd_screen(prsc->screen)->setup_slices(rsc)); + realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc)); } static void @@ -1110,6 +1101,14 @@ alloc_resource_struct(struct pipe_screen *pscreen, util_range_init(&rsc->valid_buffer_range); simple_mtx_init(&rsc->lock, mtx_plain); + rsc->track = CALLOC_STRUCT(fd_resource_tracking); + if (!rsc->track) { + free(rsc); + return NULL; + } + + pipe_reference_init(&rsc->track->reference, 1); + threaded_resource_init(prsc); if (tmpl->target == PIPE_BUFFER) @@ -1333,7 +1332,7 @@ fd_resource_create_with_modifiers(struct pipe_screen *pscreen, return NULL; rsc = fd_resource(prsc); - realloc_bo(NULL, rsc, size); + realloc_bo(rsc, size); if (!rsc->bo) goto fail; @@ -1457,25 +1456,24 @@ fd_invalidate_resource(struct pipe_context *pctx, if (prsc->target == PIPE_BUFFER) { /* Handle the glInvalidateBufferData() case: */ - invalidate_resource(ctx, rsc, PIPE_MAP_READ | PIPE_MAP_WRITE); - } else { - struct fd_batch *batch = fd_bc_writer(ctx, rsc); - if (batch) { - /* Handle the glInvalidateFramebuffer() case, telling us that - * we can skip resolve. - */ - struct pipe_framebuffer_state *pfb = &batch->framebuffer; + invalidate_resource(rsc, PIPE_MAP_READ | PIPE_MAP_WRITE); + } else if (rsc->track->write_batch) { + /* Handle the glInvalidateFramebuffer() case, telling us that + * we can skip resolve. + */ - if (pfb->zsbuf && pfb->zsbuf->texture == prsc) { - batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); - fd_context_dirty(ctx, FD_DIRTY_ZSA); - } + struct fd_batch *batch = rsc->track->write_batch; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; - for (unsigned i = 0; i < pfb->nr_cbufs; i++) { - if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) { - batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i); - fd_context_dirty(ctx, FD_DIRTY_FRAMEBUFFER); - } + if (pfb->zsbuf && pfb->zsbuf->texture == prsc) { + batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); + fd_context_dirty(ctx, FD_DIRTY_ZSA); + } + + for (unsigned i = 0; i < pfb->nr_cbufs; i++) { + if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) { + batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i); + fd_context_dirty(ctx, FD_DIRTY_FRAMEBUFFER); } } } diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index ba2825a742c..93057172746 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -28,7 +28,6 @@ #define FREEDRENO_RESOURCE_H_ #include "util/list.h" -#include "util/set.h" #include "util/simple_mtx.h" #include "util/u_dump.h" #include "util/u_range.h" @@ -56,6 +55,62 @@ enum fd_lrz_direction { FD_LRZ_GREATER, }; +/** + * State related to batch/resource tracking. + * + * With threaded_context we need to support replace_buffer_storage, in + * which case we can end up in transfer_map with tres->latest, but other + * pipe_context APIs using the original prsc pointer. This allows TC to + * not have to synchronize the front-end thread with the buffer storage + * replacement called on driver thread. But it complicates the batch/ + * resource tracking. + * + * To handle this, we need to split the tracking out into it's own ref- + * counted structure, so as needed both "versions" of the resource can + * point to the same tracking. + * + * We could *almost* just push this down to fd_bo, except for a3xx/a4xx + * hw queries, where we don't know up-front the size to allocate for + * per-tile query results. + */ +struct fd_resource_tracking { + struct pipe_reference reference; + + /* bitmask of in-flight batches which reference this resource. Note + * that the batch doesn't hold reference to resources (but instead + * the fd_ringbuffer holds refs to the underlying fd_bo), but in case + * the resource is destroyed we need to clean up the batch's weak + * references to us. + */ + uint32_t batch_mask; + + /* reference to batch that writes this resource: */ + struct fd_batch *write_batch; + + /* Set of batches whose batch-cache key references this resource. + * We need to track this to know which batch-cache entries to + * invalidate if, for example, the resource is invalidated or + * shadowed. + */ + uint32_t bc_batch_mask; +}; + +void __fd_resource_tracking_destroy(struct fd_resource_tracking *track); + +static inline void +fd_resource_tracking_reference(struct fd_resource_tracking **ptr, + struct fd_resource_tracking *track) +{ + struct fd_resource_tracking *old_track = *ptr; + + if (pipe_reference(&(*ptr)->reference, &track->reference)) { + assert(!old_track->write_batch); + free(old_track); + } + + *ptr = track; +} + /** * A resource (any buffer/texture/image/etc) */ @@ -64,13 +119,6 @@ struct fd_resource { struct fd_bo *bo; /* use fd_resource_set_bo() to write */ enum pipe_format internal_format; uint32_t hash; /* _mesa_hash_pointer() on this resource's address. */ - - /* Atomic counter of the number of batches referencing this resource in any - * batch cache. Used for fd_resource_busy(), which needs to check for busy - * in the batch cache from the frontend thread. - */ - uint32_t batch_references; - struct fdl_layout layout; /* buffer range that has been initialized */ @@ -82,6 +130,8 @@ struct fd_resource { /* TODO rename to secondary or auxiliary? */ struct fd_resource *stencil; + struct fd_resource_tracking *track; + simple_mtx_t lock; /* bitmask of state this resource could potentially dirty when rebound, @@ -94,6 +144,8 @@ struct fd_resource { /* Is this buffer a replacement created by threaded_context to avoid * a stall in PIPE_MAP_DISCARD_WHOLE_RESOURCE|PIPE_MAP_WRITE case? + * If so, it no longer "owns" it's rsc->track, and so should not + * invalidate when the rsc is destroyed. */ bool is_replacement : 1; @@ -141,35 +193,17 @@ fd_memory_object(struct pipe_memory_object *pmemobj) } static inline bool -fd_batch_references(struct fd_batch *batch, struct fd_resource *rsc) +pending(struct fd_resource *rsc, bool write) { - return _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc) != NULL; -} + /* if we have a pending GPU write, we are busy in any case: */ + if (rsc->track->write_batch) + return true; -static inline struct fd_batch * -fd_bc_writer(struct fd_context *ctx, struct fd_resource *rsc) -{ - struct hash_entry *entry = - _mesa_hash_table_search_pre_hashed(ctx->batch_cache.written_resources, rsc->hash, rsc); - if (entry) - return entry->data; - return NULL; -} + /* if CPU wants to write, but we are pending a GPU read, we are busy: */ + if (write && rsc->track->batch_mask) + return true; -static inline bool -pending(struct fd_context *ctx, struct fd_resource *rsc, bool write) assert_dt -{ - if (write) { - foreach_batch (batch, &ctx->batch_cache) { - if (fd_batch_references(batch, rsc)) - return true; - } - } else { - if (fd_bc_writer(ctx, rsc)) - return true; - } - - if (rsc->stencil && pending(ctx, rsc->stencil, write)) + if (rsc->stencil && pending(rsc->stencil, write)) return true; return false; @@ -314,7 +348,7 @@ void fd_resource_screen_init(struct pipe_screen *pscreen); void fd_resource_context_init(struct pipe_context *pctx); uint32_t fd_setup_slices(struct fd_resource *rsc); -void fd_resource_resize(struct fd_context *ctx, struct pipe_resource *prsc, uint32_t sz); +void fd_resource_resize(struct pipe_resource *prsc, uint32_t sz); void fd_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_resource *src, @@ -331,6 +365,12 @@ void fd_resource_dump(struct fd_resource *rsc, const char *name); bool fd_render_condition_check(struct pipe_context *pctx) assert_dt; +static inline bool +fd_batch_references_resource(struct fd_batch *batch, struct fd_resource *rsc) +{ + return rsc->track->batch_mask & (1 << batch->idx); +} + static inline void fd_batch_write_prep(struct fd_batch *batch, struct fd_resource *rsc) assert_dt { @@ -348,7 +388,7 @@ fd_batch_resource_read(struct fd_batch *batch, * writing to it (since both _write and _read flush other writers), and * that we've already recursed for stencil. */ - if (unlikely(!fd_batch_references(batch, rsc))) + if (unlikely(!fd_batch_references_resource(batch, rsc))) fd_batch_resource_read_slowpath(batch, rsc); } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 080d67918c8..15fb442ef89 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -155,6 +155,7 @@ fd_screen_destroy(struct pipe_screen *pscreen) if (screen->ro) screen->ro->destroy(screen->ro); + fd_bc_fini(&screen->batch_cache); fd_gmem_screen_fini(pscreen); slab_destroy_parent(&screen->transfer_pool); @@ -1087,6 +1088,8 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro, if (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS) screen->reorder = !FD_DBG(INORDER); + fd_bc_init(&screen->batch_cache); + list_inithead(&screen->context_list); util_idalloc_mt_init_tc(&screen->buffer_ids); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index a9bda74dc30..ca15d3e4a5f 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -41,11 +41,11 @@ #include "util/u_memory.h" #include "util/u_queue.h" +#include "freedreno_batch_cache.h" #include "freedreno_gmem.h" #include "freedreno_util.h" struct fd_bo; -struct fd_resource; /* Potential reasons for needing to skip bypass path and use GMEM, the * generation backend can override this with screen->gmem_reason_mask @@ -135,6 +135,7 @@ struct fd_screen { int64_t cpu_gpu_time_delta; + struct fd_batch_cache batch_cache; struct fd_gmem_cache gmem_cache; bool reorder; diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index df9c803ff51..5d5b942be39 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -276,13 +276,18 @@ fd_set_framebuffer_state(struct pipe_context *pctx, cso->samples = util_framebuffer_get_num_samples(cso); if (ctx->screen->reorder) { - if (ctx->batch) { - fd_batch_finish_queries(ctx->batch); - fd_batch_reference(&ctx->batch, NULL); - } + struct fd_batch *old_batch = NULL; + fd_batch_reference(&old_batch, ctx->batch); + + if (likely(old_batch)) + fd_batch_finish_queries(old_batch); + + fd_batch_reference(&ctx->batch, NULL); fd_context_all_dirty(ctx); ctx->update_active_queries = true; + + fd_batch_reference(&old_batch, NULL); } else if (ctx->batch) { DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush, framebuffer->cbufs[0], framebuffer->zsbuf);