asahi: Lazily initialize batch state on first draw

We track buffers written by batches, but this gets messy when we end up with an empty batch that is never submitted, since then it might have taken over writer state from a prior already submitted batch (for its framebuffers). Instead of trying to track two tiers of resource writers, let's just defer initializing batch state until we know we have a draw (or compute launch, or clear). This means that if a batch is marked as a writer for a buffer, we know it will not be an empty batch. This should be a small performance win for empty batches (no need to emit initial VDM state or run the writer code), but more impontantly it eliminates the empty batch writer state revert corner case. Signed-off-by: Asahi Lina <lina@asahilina.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22971>
2026-05-09 04:38:03 +02:00 · 2023-05-10 15:57:55 +09:00 · 2023-05-10 15:57:55 +09:00 · dc1a18b0ed
commit dc1a18b0ed
parent f8b055eb96
4 changed files with 46 additions and 46 deletions
--- a/src/gallium/drivers/asahi/agx_batch.c
+++ b/src/gallium/drivers/asahi/agx_batch.c
@ -117,26 +117,11 @@ agx_batch_init(struct agx_context *ctx,
   batch->clear_stencil = 0;
   batch->varyings = 0;
   batch->any_draws = false;
+   batch->initialized = false;

   /* We need to emit prim state at the start. Max collides with all. */
   batch->reduced_prim = PIPE_PRIM_MAX;

-   if (batch->key.zsbuf) {
-      struct agx_resource *rsrc = agx_resource(key->zsbuf->texture);
-      agx_batch_writes(batch, rsrc);
-
-      if (rsrc->separate_stencil)
-         agx_batch_writes(batch, rsrc->separate_stencil);
-   }
-
-   for (unsigned i = 0; i < key->nr_cbufs; ++i) {
-      if (key->cbufs[i])
-         agx_batch_writes(batch, agx_resource(key->cbufs[i]->texture));
-   }
-
-   if (key->width != AGX_COMPUTE_BATCH_WIDTH)
-      agx_batch_init_state(batch);
-
   if (!batch->syncobj) {
      int ret = drmSyncobjCreate(dev->fd, 0, &batch->syncobj);
      assert(!ret && batch->syncobj);
@ -167,36 +152,8 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset)
   if (reset) {
      int handle;
      AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
-         struct agx_bo *bo = agx_lookup_bo(dev, handle);
-
-         /* Revert the writer to the last submitted batch that writes this BO,
-          * if any.
-          */
-         struct agx_batch *writer = agx_writer_get(ctx, handle);
-
-         if (writer == batch) {
-            int i;
-            assert(bo->writer_syncobj != batch->syncobj);
-            agx_writer_remove(ctx, handle);
-
-            if (bo->writer_syncobj) {
-               struct agx_batch *old_batch = NULL;
-               foreach_submitted(ctx, i) {
-                  old_batch = &ctx->batches.slots[i];
-                  if (old_batch->syncobj == bo->writer_syncobj)
-                     break;
-                  else
-                     old_batch = NULL;
-               }
-               assume(old_batch);
-               batch_debug(batch,
-                           "Resetting writer for BO @ 0x%" PRIx64
-                           " to batch %d (syncobj was %d)",
-                           bo->ptr.gpu, agx_batch_idx(old_batch),
-                           bo->writer_syncobj);
-               agx_writer_add(ctx, agx_batch_idx(old_batch), bo->handle);
-            }
-         }
+         /* We should write no buffers if this is an empty batch */
+         assert(agx_writer_get(ctx, handle) != batch);

         agx_bo_unreference(agx_lookup_bo(dev, handle));
      }
@ -496,6 +453,8 @@ agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc)
   struct agx_context *ctx = batch->ctx;
   struct agx_batch *writer = agx_writer_get(ctx, rsrc->bo->handle);

+   assert(batch->initialized);
+
   agx_flush_readers_except(ctx, rsrc, batch, "Write from other batch", false);

   /* Nothing to do if we're already writing */
@ -780,6 +739,8 @@ agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch)
 {
   batch_debug(batch, "RESET");

+   assert(!batch->initialized);
+
   /* Reset an empty batch. Like submit, but does nothing. */
   agx_batch_mark_submitted(batch);

--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@ -1092,6 +1092,9 @@ agx_clear(struct pipe_context *pctx, unsigned buffers,
         util_framebuffer_get_num_samples(&ctx->framebuffer) > 1);
   }

+   if (fastclear)
+      agx_batch_init_state(batch);
+
   batch->clear |= fastclear;
   batch->resolve |= buffers;
   assert((batch->draw & slowclear) == slowclear);
@ -1200,6 +1203,8 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
      return;
   }

+   assert(batch->initialized);
+
   /* Finalize the encoder */
   uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00};
   memcpy(batch->encoder_current, stop, sizeof(stop));
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@ -2065,6 +2065,14 @@ agx_build_meta(struct agx_batch *batch, bool store, bool partial_render)
 void
 agx_batch_init_state(struct agx_batch *batch)
 {
+   if (batch->initialized)
+      return;
+
+   if (batch->key.width == AGX_COMPUTE_BATCH_WIDTH) {
+      batch->initialized = true;
+      return;
+   }
+
   /* Emit state on the batch that we don't change and so don't dirty track */
   uint8_t *out = batch->encoder_current;
   struct agx_ppp_update ppp =
@ -2089,6 +2097,9 @@ agx_batch_init_state(struct agx_batch *batch)
   agx_ppp_fini(&out, &ppp);
   batch->encoder_current = out;

+   /* Mark it as initialized now, since agx_batch_writes() will check this. */
+   batch->initialized = true;
+
   /* Choose a tilebuffer layout given the framebuffer key */
   enum pipe_format formats[PIPE_MAX_COLOR_BUFS] = {0};
   for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
@ -2100,6 +2111,19 @@ agx_batch_init_state(struct agx_batch *batch)
   batch->tilebuffer_layout = agx_build_tilebuffer_layout(
      formats, batch->key.nr_cbufs,
      util_framebuffer_get_num_samples(&batch->key));
+
+   if (batch->key.zsbuf) {
+      struct agx_resource *rsrc = agx_resource(batch->key.zsbuf->texture);
+      agx_batch_writes(batch, rsrc);
+
+      if (rsrc->separate_stencil)
+         agx_batch_writes(batch, rsrc->separate_stencil);
+   }
+
+   for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
+      if (batch->key.cbufs[i])
+         agx_batch_writes(batch, agx_resource(batch->key.cbufs[i]->texture));
+   }
 }

 static enum agx_object_type
@ -2574,6 +2598,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
   if (ctx->rast->base.rasterizer_discard && !ctx->streamout.key.active)
      return;

+   agx_batch_init_state(batch);
+
   /* Dirty track the reduced prim: lines vs points vs triangles */
   enum pipe_prim_type reduced_prim = u_reduced_prim(info->mode);
   if (reduced_prim != batch->reduced_prim)
@ -2765,6 +2791,13 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
   struct agx_context *ctx = agx_context(pipe);
   struct agx_batch *batch = agx_get_compute_batch(ctx);

+   agx_batch_init_state(batch);
+
+   /* Consider compute launches as "draws" for the purposes of sanity
+    * checking batch state.
+    */
+   batch->any_draws = true;
+
   /* To implement load_num_workgroups, the number of workgroups needs to be
    * available in GPU memory. This is either the indirect buffer, or just a
    * buffer we upload ourselves if not indirect.
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@ -212,6 +212,7 @@ struct agx_batch {
   /* PIPE_CLEAR_* bitmask */
   uint32_t clear, draw, load, resolve;
   bool any_draws;
+   bool initialized;

   uint64_t uploaded_clear_color[PIPE_MAX_COLOR_BUFS];
   double clear_depth;