freedreno: nondraw-batch

Allow multiple compute grids to be combined into a single non-draw batch. This will allow us to optimize state emit and remove excess flushing between compute jobs. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20975>
2025-12-25 13:00:09 +01:00 · 2022-12-04 12:22:25 -08:00 · 2022-12-04 12:22:25 -08:00 · 2503e22717
commit 2503e22717
parent 0e3f2646dd
6 changed files with 48 additions and 13 deletions
--- a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.c
@ -114,9 +114,18 @@ static void
 add_flushes(struct pipe_context *pctx, unsigned flushes)
   assert_dt
 {
+   struct fd_context *ctx = fd_context(pctx);
   struct fd_batch *batch = NULL;

-   fd_batch_reference(&batch, fd_context(pctx)->batch);
+   /* If there is an active compute/nondraw batch, that is the one
+    * we want to add the flushes to.  Ie. last op was a launch_grid,
+    * if the next one is a launch_grid then the barriers should come
+    * between them.  If the next op is a draw_vbo then the batch
+    * switch is a sufficient barrier so it doesn't really matter.
+    */
+   fd_batch_reference(&batch, ctx->batch_nondraw);
+   if (!batch)
+      fd_batch_reference(&batch, ctx->batch);

   /* A batch flush is already a sufficient barrier: */
   if (!batch)
--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
@ -235,6 +235,8 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
   OUT_WFI5(ring);

   fd6_cache_flush(ctx->batch, ring);
+
+   fd_context_all_clean(ctx);
 }

 void
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@ -365,6 +365,9 @@ batch_flush(struct fd_batch *batch) assert_dt
   if (batch == batch->ctx->batch)
      fd_batch_reference_locked(&batch->ctx->batch, NULL);

+   if (batch == batch->ctx->batch_nondraw)
+      fd_batch_reference_locked(&batch->ctx->batch_nondraw, NULL);
+
   fd_screen_unlock(batch->ctx->screen);

   if (batch->fence)
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@ -334,6 +334,11 @@ fd_context_batch(struct fd_context *ctx)

   tc_assert_driver_thread(ctx->tc);

+   if (ctx->batch_nondraw) {
+      fd_batch_reference(&ctx->batch_nondraw, NULL);
+      fd_context_all_dirty(ctx);
+   }
+
   fd_batch_reference(&batch, ctx->batch);

   if (unlikely(!batch)) {
@ -369,6 +374,28 @@ fd_context_batch_locked(struct fd_context *ctx)
   return batch;
 }

+/**
+ * Return a reference to the current non-draw (compute/blit) batch.
+ */
+struct fd_batch *
+fd_context_batch_nondraw(struct fd_context *ctx)
+{
+   struct fd_batch *batch = NULL;
+
+   tc_assert_driver_thread(ctx->tc);
+
+   fd_batch_reference(&batch, ctx->batch_nondraw);
+
+   if (unlikely(!batch)) {
+      batch = fd_bc_alloc_batch(ctx, true);
+      fd_batch_reference(&ctx->batch_nondraw, batch);
+      fd_context_all_dirty(ctx);
+   }
+   fd_context_switch_to(ctx, batch);
+
+   return batch;
+}
+
 void
 fd_context_destroy(struct pipe_context *pctx)
 {
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@ -299,6 +299,10 @@ struct fd_context {
    */
   struct fd_batch *batch dt;

+   /* Current nondraw batch.  Rules are the same as for draw batch.
+    */
+   struct fd_batch *batch_nondraw dt;
+
   /* NULL if there has been rendering since last flush.  Otherwise
    * keeps a reference to the last fence so we can re-use it rather
    * than having to flush no-op batch.
@ -661,13 +665,6 @@ fd_context_all_clean(struct fd_context *ctx) assert_dt
   ctx->dirty = (enum fd_dirty_3d_state)0;
   ctx->gen_dirty = 0;
   for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
-      /* don't mark compute state as clean, since it is not emitted
-       * during normal draw call.  The places that call _all_dirty(),
-       * it is safe to mark compute state dirty as well, but the
-       * inverse is not true.
-       */
-      if (i == PIPE_SHADER_COMPUTE)
-         continue;
      ctx->dirty_shader[i] = (enum fd_dirty_shader_state)0;
   }
 }
@ -711,6 +708,7 @@ void fd_context_switch_to(struct fd_context *ctx,
                          struct fd_batch *batch) assert_dt;
 struct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt;
 struct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt;
+struct fd_batch *fd_context_batch_nondraw(struct fd_context *ctx) assert_dt;

 void fd_context_setup_common_vbos(struct fd_context *ctx);
 void fd_context_cleanup_common_vbos(struct fd_context *ctx);
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@ -524,10 +524,9 @@ fd_launch_grid(struct pipe_context *pctx,
      &ctx->shaderbuf[PIPE_SHADER_COMPUTE];
   struct fd_batch *batch, *save_batch = NULL;

-   batch = fd_bc_alloc_batch(ctx, true);
+   batch = fd_context_batch_nondraw(ctx);
   fd_batch_reference(&save_batch, ctx->batch);
   fd_batch_reference(&ctx->batch, batch);
-   fd_context_all_dirty(ctx);

   fd_screen_lock(ctx->screen);

@ -579,10 +578,7 @@ fd_launch_grid(struct pipe_context *pctx,
   fd_batch_needs_flush(batch);
   ctx->launch_grid(ctx, info);

-   fd_batch_flush(batch);
-
   fd_batch_reference(&ctx->batch, save_batch);
-   fd_context_all_dirty(ctx);
   fd_batch_reference(&save_batch, NULL);
   fd_batch_reference(&batch, NULL);
 }