nir,asahi,hk: add barrier argument to MESA_DISPATCH_PRECOMP

In the current API, precomp implicitly assumes full barriers both before & after every dispatch. That's not good for performance. However, dropping the barriers and requiring user to explicitly call barrier functions before/after would have bad ergonomics. So, we add a new parameter to the standard MESA_DISPATCH_PRECOMP signature representing the barriers required around the dispatch. As usual, the actual type & semantic is left to drivers to define what makes sense for their hardware. We just reserve the place for it. (I think most drivers will want bitflags here, but I don't think the actual flags are worth. If a driver wanted to use a struct here, that would work too.) Since the asahi stack doesn't do anything clever with barriers yet, we mechnically add an AGX_BARRIER_ALL barrier to all precomp users in-tree. We can optimize that later, this just gets the flag-day change in with no functional change. For JM panfrost, this will provide a convenient place to stash both their "job barrier" bit and their "suppress prefetch" bit (which is really a sort of barrier / cache flush, if you think about it). Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32980>
2026-05-09 06:48:06 +02:00 · 2025-01-10 13:19:22 -05:00 · 2025-01-10 13:19:22 -05:00 · 401b400de3
commit 401b400de3
parent 4955a68a03
10 changed files with 86 additions and 68 deletions
--- a/src/asahi/lib/agx_helpers.h
+++ b/src/asahi/lib/agx_helpers.h
@ -260,20 +260,21 @@ agx_fill_decompress_args(struct ail_layout *layout, unsigned layer,
 }
 #undef libagx_decompress
-#define libagx_decompress(context, grid, layout, layer, level, ptr, images)    \
+#define libagx_decompress(context, grid, barrier, layout, layer, level, ptr,   \
                          images)                                              \
   libagx_decompress_struct(                                                   \
-      context, grid,                                                           \
+      context, grid, barrier,                                                  \
      agx_fill_decompress_args(layout, layer, level, ptr, images),             \
      util_logbase2(layout->sample_count_sa))
-#define libagx_tessellate(context, grid, prim, mode, state)                    \
+#define libagx_tessellate(context, grid, barrier, prim, mode, state)           \
   if (prim == TESS_PRIMITIVE_QUADS) {                                         \
-      libagx_tess_quad(context, grid, state, mode);                            \
+      libagx_tess_quad(context, grid, barrier, state, mode);                   \
   } else if (prim == TESS_PRIMITIVE_TRIANGLES) {                              \
-      libagx_tess_tri(context, grid, state, mode);                             \
+      libagx_tess_tri(context, grid, barrier, state, mode);                    \
   } else {                                                                    \
      assert(prim == TESS_PRIMITIVE_ISOLINES);                                 \
-      libagx_tess_isoline(context, grid, state, mode);                         \
+      libagx_tess_isoline(context, grid, barrier, state, mode);                \
   }
 struct agx_border_packed;
--- a/src/asahi/vulkan/hk_cmd_buffer.c
+++ b/src/asahi/vulkan/hk_cmd_buffer.c
@ -709,7 +709,8 @@ hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s,
 void
 hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid,
-                    enum libagx_program idx, void *data, size_t data_size)
+                    enum agx_barrier barrier, enum libagx_program idx,
                    void *data, size_t data_size)
 {
   struct hk_device *dev = hk_cmd_buffer_device(cs->cmd);
   struct agx_precompiled_shader *prog = agx_get_precompiled(&dev->bg_eot, idx);
--- a/src/asahi/vulkan/hk_cmd_buffer.h
+++ b/src/asahi/vulkan/hk_cmd_buffer.h
@ -803,8 +803,9 @@ hk_dispatch_with_local_size(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
   hk_dispatch_with_usc(dev, cs, &s->b.info, usc, grid, local_size);
 }
-void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid gird,
+void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid,
-                         enum libagx_program idx, void *data, size_t data_size);
+                         enum agx_barrier barrier, enum libagx_program idx,
                         void *data, size_t data_size);
 #define MESA_DISPATCH_PRECOMP hk_dispatch_precomp
--- a/src/asahi/vulkan/hk_cmd_dispatch.c
+++ b/src/asahi/vulkan/hk_cmd_dispatch.c
@ -103,8 +103,8 @@ dispatch(struct hk_cmd_buffer *cmd, struct agx_grid grid)
      perf_debug(dev, "CS invocation statistic");
      uint64_t grid = cmd->state.cs.descriptors.root.cs.group_count_addr;
-      libagx_increment_cs_invocations(cs, agx_1d(1), grid, stat,
+      libagx_increment_cs_invocations(cs, agx_1d(1), grid, AGX_BARRIER_ALL,
-                                      agx_workgroup_threads(local_size));
+                                      stat, agx_workgroup_threads(local_size));
   }
   hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);
--- a/src/asahi/vulkan/hk_cmd_draw.c
+++ b/src/asahi/vulkan/hk_cmd_draw.c
@ -861,7 +861,8 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
                  agx_3d(ail_metadata_width_tl(layout, level) * 32,
                         ail_metadata_height_tl(layout, level), layer_count);
-               libagx_decompress(cs, grid, layout, layer, level, base,
+               libagx_decompress(cs, grid, AGX_BARRIER_ALL, layout, layer,
                                 level, base,
                                 hk_pool_upload(cmd, &imgs, sizeof(imgs), 64));
            }
         }
@ -1389,8 +1390,8 @@ hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
      .zero_sink = dev->rodata.zero_sink,
   };
-   libagx_unroll_restart_struct(cs, agx_1d(1024 * draw_count), ia,
+   libagx_unroll_restart_struct(cs, agx_1d(1024 * draw_count), AGX_BARRIER_ALL,
-                                draw.index_size, libagx_compact_prim(prim));
+                                ia, draw.index_size, libagx_compact_prim(prim));
   return agx_draw_indexed_indirect(ia.out_draw, dev->heap->va->addr,
                                    dev->heap->size, draw.index_size,
@ -1460,7 +1461,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
         gsi.index_buffer_range_el = agx_draw_index_range_el(draw);
      }
-      libagx_gs_setup_indirect_struct(cs, agx_1d(1), gsi);
+      libagx_gs_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, gsi);
      grid_vs = agx_grid_indirect(
         geometry_params + offsetof(struct agx_geometry_params, vs_grid));
@ -1486,7 +1487,8 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
      hk_dispatch_with_local_size(cmd, cs, count, grid_gs,
                                  agx_workgroup(1, 1, 1));
-      libagx_prefix_sum_geom(cs, agx_1d(1024 * count_words), geometry_params);
+      libagx_prefix_sum_geom(cs, agx_1d(1024 * count_words), AGX_BARRIER_ALL,
                             geometry_params);
   }
   /* Pre-GS shader */
@ -1549,7 +1551,7 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
         args.in_index_buffer_range_el = agx_draw_index_range_el(draw);
      }
-      libagx_tess_setup_indirect_struct(cs, agx_1d(1), args);
+      libagx_tess_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, args);
      uint32_t grid_stride = sizeof(uint32_t) * 6;
      grid_vs = agx_grid_indirect_local(gfx->tess.grids + 0 * grid_stride);
@ -1565,7 +1567,8 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
      /* TCS invocation counter increments once per-patch */
      if (tcs_stat) {
         perf_debug(dev, "Direct TCS statistic");
-         libagx_increment_statistic(cs, agx_1d(1), tcs_stat, patches);
+         libagx_increment_statistic(cs, agx_1d(1), AGX_BARRIER_ALL, tcs_stat,
                                    patches);
      }
   }
@ -1583,10 +1586,13 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
      grid_tcs, agx_workgroup(tcs->info.tess.tcs_output_patch_size, 1, 1));
   /* First generate counts, then prefix sum them, and then tessellate. */
-   libagx_tessellate(cs, grid_tess, info.mode, LIBAGX_TESS_MODE_COUNT, state);
+   libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode,
-   libagx_prefix_sum_tess(cs, agx_1d(1024), state);
+                     LIBAGX_TESS_MODE_COUNT, state);
-   libagx_tessellate(cs, grid_tess, info.mode, LIBAGX_TESS_MODE_WITH_COUNTS,
+
-                     state);
+   libagx_prefix_sum_tess(cs, agx_1d(1024), AGX_BARRIER_ALL, state);
   libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode,
                     LIBAGX_TESS_MODE_WITH_COUNTS, state);
   return agx_draw_indexed_indirect(gfx->tess.out_draws, dev->heap->va->addr,
                                    dev->heap->size, AGX_INDEX_SIZE_U32, false);
@ -3358,12 +3364,13 @@ hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw,
      uint32_t index_size_B = agx_index_size_to_B(draw.index_size);
      libagx_increment_ia_restart(
-         cs, agx_1d(1024), ia_vertices, ia_prims, vs_invocations, c_prims,
+         cs, agx_1d(1024), AGX_BARRIER_ALL, ia_vertices, ia_prims,
-         c_inv, draw_ptr, draw.index_buffer, agx_draw_index_range_el(draw),
+         vs_invocations, c_prims, c_inv, draw_ptr, draw.index_buffer,
-         cmd->state.gfx.index.restart, index_size_B, prim);
+         agx_draw_index_range_el(draw), cmd->state.gfx.index.restart,
         index_size_B, prim);
   } else {
-      libagx_increment_ia(cs, agx_1d(1), ia_vertices, ia_prims, vs_invocations,
+      libagx_increment_ia(cs, agx_1d(1), AGX_BARRIER_ALL, ia_vertices, ia_prims,
-                          c_prims, c_inv, draw_ptr, prim);
+                          vs_invocations, c_prims, c_inv, draw_ptr, prim);
   }
 }
@ -3476,7 +3483,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
            struct hk_descriptor_state *desc = &cmd->state.gfx.descriptors;
            libagx_draw_without_adj(
-               ccs, agx_1d(1), out_draw, draw.b.ptr,
+               ccs, agx_1d(1), AGX_BARRIER_ALL, out_draw, draw.b.ptr,
               desc->root.draw.input_assembly, draw.index_buffer,
               draw.indexed ? agx_draw_index_range_el(draw) : 0,
               draw.indexed ? agx_index_size_to_B(draw.index_size) : 0, prim);
@ -3503,7 +3510,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
         size_t size_B = libagx_draw_robust_index_vdm_size();
         uint64_t target = hk_cs_alloc_for_indirect(cs, size_B);
-         libagx_draw_robust_index(ccs, agx_1d(32), target,
+         libagx_draw_robust_index(ccs, agx_1d(32), AGX_BARRIER_ALL, target,
                                  hk_geometry_state(cmd), draw.b.ptr,
                                  draw.index_buffer, draw.index_buffer_range_B,
                                  draw.restart, topology, draw.index_size);
@ -3728,8 +3735,8 @@ hk_draw_indirect_count(VkCommandBuffer commandBuffer, VkBuffer _buffer,
   uint64_t in = hk_buffer_address(buffer, offset);
   uint64_t count_addr = hk_buffer_address(count_buffer, countBufferOffset);
-   libagx_predicate_indirect(cs, agx_1d(maxDrawCount), patched, in, count_addr,
+   libagx_predicate_indirect(cs, agx_1d(maxDrawCount), AGX_BARRIER_ALL, patched,
-                             stride / 4, indexed);
+                             in, count_addr, stride / 4, indexed);
   if (indexed) {
      hk_draw_indexed_indirect_inner(commandBuffer, patched, maxDrawCount,
@ -3847,7 +3854,7 @@ hk_begin_end_xfb(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
   if (copies > 0) {
      perf_debug(dev, "XFB counter copy");
-      libagx_copy_xfb_counters(cs, agx_1d(copies),
+      libagx_copy_xfb_counters(cs, agx_1d(copies), AGX_BARRIER_ALL,
                               hk_pool_upload(cmd, &params, sizeof(params), 8));
   }
 }
--- a/src/asahi/vulkan/hk_query_pool.c
+++ b/src/asahi/vulkan/hk_query_pool.c
@ -268,7 +268,7 @@ hk_dispatch_imm_writes(struct hk_cmd_buffer *cmd, struct hk_cs *cs)
      util_dynarray_num_elements(&cs->imm_writes, struct libagx_imm_write);
   assert(count > 0);
-   libagx_write_u32s(cs, agx_1d(count), params);
+   libagx_write_u32s(cs, agx_1d(count), AGX_BARRIER_ALL, params);
 }
 void
@ -305,7 +305,7 @@ hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value,
   hk_cdm_cache_flush(dev, cs);
   perf_debug(dev, "Queued write");
-   libagx_write_u32(cs, agx_1d(1), address, value);
+   libagx_write_u32(cs, agx_1d(1), AGX_BARRIER_ALL, address, value);
 }
 /**
@ -433,7 +433,7 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
      if (!after)
         return;
-      libagx_copy_timestamp(after, agx_1d(1), report_addr,
+      libagx_copy_timestamp(after, agx_1d(1), AGX_BARRIER_ALL, report_addr,
                            cs->timestamp.end.addr);
   } else {
      cs->timestamp.end = (struct agx_timestamp_req){
@ -702,5 +702,5 @@ hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
      .with_availability = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT,
   };
-   libagx_copy_query_struct(cs, agx_1d(queryCount), info);
+   libagx_copy_query_struct(cs, agx_1d(queryCount), AGX_BARRIER_ALL, info);
 }
--- a/src/compiler/nir/nir_precompiled.h
+++ b/src/compiler/nir/nir_precompiled.h
@ -104,14 +104,15 @@
 * implement that mechanism, a driver must implement the following function
 * signature:
 *
- *    MESA_DISPATCH_PRECOMP(context, grid, kernel index, argument pointer,
+ *    MESA_DISPATCH_PRECOMP(context, grid, barrier, kernel index,
- *                          size of arguments)
+ *                          argument pointer, size of arguments)
 *
 * The exact types used are determined by the driver. context is something like
- * a Vulkan command buffer. grid represents the 3D dispatch size. kernel index
+ * a Vulkan command buffer. grid represents the 3D dispatch size. barrier
- * is the index of the precompiled kernel (nir_precomp_index). argument pointer
+ * describes the synchronization and cache flushing required before and after
- * is a host pointer to the sized argument structure, which the driver must
+ * the dispatch. kernel index is the index of the precompiled kernel
- * upload and bind (e.g. as push constants).
+ * (nir_precomp_index). argument pointer is a host pointer to the sized argument
 * structure, which the driver must upload and bind (e.g. as push constants).
 *
 * Because the types are ambiguous here, the same mechanism works for both
 * Gallium and Vulkan drivers.
@ -479,7 +480,7 @@ nir_precomp_print_dispatch_macros(FILE *fp, const struct nir_precomp_opts *opt,
      for (unsigned i = 0; i < 2; ++i) {
         bool is_struct = i == 0;
-         fprintf(fp, "#define %s%s(_context, _grid%s", func->name,
+         fprintf(fp, "#define %s%s(_context, _grid, _barrier%s", func->name,
                 is_struct ? "_struct" : "", is_struct ? ", _data" : "");
         /* Add the arguments, including variant parameters. For struct macros,
@ -523,7 +524,7 @@ nir_precomp_print_dispatch_macros(FILE *fp, const struct nir_precomp_opts *opt,
         /* Dispatch via MESA_DISPATCH_PRECOMP, which the driver must #define
          * suitably before #include-ing this file.
          */
-         fprintf(fp, "   MESA_DISPATCH_PRECOMP(_context, _grid, ");
+         fprintf(fp, "   MESA_DISPATCH_PRECOMP(_context, _grid, _barrier, ");
         nir_precomp_print_enum_value(fp, func);
         nir_precomp_print_variant_params(fp, func, false);
         fprintf(fp, ", &_args, sizeof(_args)); \\\n");
--- a/src/gallium/drivers/asahi/agx_query.c
+++ b/src/gallium/drivers/asahi/agx_query.c
@ -15,6 +15,7 @@
 #include "agx_device.h"
 #include "agx_state.h"
 #include "libagx.h"
 #include "libagx_dgc.h"
 #include "libagx_shaders.h"
 static bool
@ -500,7 +501,7 @@ agx_get_query_result_resource_gpu(struct agx_context *ctx,
                        : copy_type == QUERY_COPY_BOOL32 ? 4
                                                         : 0;
-   libagx_copy_query_gl(batch, agx_1d(1), query->ptr.gpu,
+   libagx_copy_query_gl(batch, agx_1d(1), AGX_BARRIER_ALL, query->ptr.gpu,
                        rsrc->bo->va->addr + offset, result_type, bool_size);
   return true;
 }
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@ -3103,7 +3103,8 @@ agx_launch_internal(struct agx_batch *batch, struct agx_grid grid,
 void
 agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid,
-                   enum libagx_program program, void *args, size_t arg_size)
+                   enum agx_barrier barrier, enum libagx_program program,
                   void *args, size_t arg_size)
 {
   struct agx_device *dev = agx_device(batch->ctx->base.screen);
   struct agx_precompiled_shader *cs =
@ -3935,14 +3936,15 @@ agx_ia_update(struct agx_batch *batch, const struct pipe_draw_info *info,
      perf_debug(dev, "Input assembly counters with primitive restart");
      libagx_increment_ia_restart(
-         batch, agx_1d(1024), ia_vertices, ia_primitives, vs_invocations,
+         batch, agx_1d(1024), AGX_BARRIER_ALL, ia_vertices, ia_primitives,
-         c_prims, c_invs, draw, ib, ib_range_el, info->restart_index,
+         vs_invocations, c_prims, c_invs, draw, ib, ib_range_el,
-         info->index_size, info->mode);
+         info->restart_index, info->index_size, info->mode);
   } else {
      perf_debug(dev, "Input assembly counters");
-      libagx_increment_ia(batch, agx_1d(1), ia_vertices, ia_primitives,
+      libagx_increment_ia(batch, agx_1d(1), AGX_BARRIER_ALL, ia_vertices,
-                          vs_invocations, c_prims, c_invs, draw, info->mode);
+                          ia_primitives, vs_invocations, c_prims, c_invs, draw,
                          info->mode);
   }
 }
@ -4146,7 +4148,7 @@ agx_launch_gs_prerast(struct agx_batch *batch,
         .prim = info->mode,
      };
-      libagx_gs_setup_indirect_struct(batch, agx_1d(1), gsi);
+      libagx_gs_setup_indirect_struct(batch, agx_1d(1), AGX_BARRIER_ALL, gsi);
      wg = agx_workgroup(1, 1, 1);
      grid_vs =
@ -4172,7 +4174,8 @@ agx_launch_gs_prerast(struct agx_batch *batch,
      agx_launch(batch, grid_gs, wg, gs->gs_count, NULL, PIPE_SHADER_GEOMETRY,
                 0);
-      libagx_prefix_sum_geom(batch, agx_1d(1024 * gs->gs_count_words), gp);
+      libagx_prefix_sum_geom(batch, agx_1d(1024 * gs->gs_count_words),
                             AGX_BARRIER_ALL, gp);
   }
   /* Pre-GS shader */
@ -4243,9 +4246,9 @@ agx_draw_without_restart(struct agx_batch *batch,
   };
   /* Unroll the index buffer for each draw */
-   libagx_unroll_restart_struct(batch, agx_1d(1024 * indirect->draw_count),
+   libagx_unroll_restart_struct(
-                                unroll, util_logbase2(info->index_size),
+      batch, agx_1d(1024 * indirect->draw_count), AGX_BARRIER_ALL, unroll,
-                                libagx_compact_prim(info->mode));
+      util_logbase2(info->index_size), libagx_compact_prim(info->mode));
   /* Now draw the results without restart */
   struct pipe_draw_info new_info = {
@ -4675,10 +4678,10 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
      uint64_t grids =
         agx_pool_alloc_aligned(&batch->pool, grid_stride * 3, 4).gpu;
-      libagx_tess_setup_indirect(batch, agx_1d(1), state, grids,
+      libagx_tess_setup_indirect(
-                                 0 /* XXX: IA */, indirect_ptr, vertex_out_ptr,
+         batch, agx_1d(1), AGX_BARRIER_ALL, state, grids, 0 /* XXX: IA */,
-                                 0, 0, 0 /* XXX: Index buffer */,
+         indirect_ptr, vertex_out_ptr, 0, 0, 0 /* XXX: Index buffer */,
-                                 ctx->vs->b.info.outputs, tcs_statistic);
+         ctx->vs->b.info.outputs, tcs_statistic);
      batch->uniforms.vertex_output_buffer_ptr = vertex_out_ptr;
@ -4698,10 +4701,11 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
   batch->uniforms.vertex_output_buffer_ptr = 0;
   /* Generate counts, then prefix sum them, then finally tessellate. */
-   libagx_tessellate(batch, tess_grid, mode, LIBAGX_TESS_MODE_COUNT, state);
+   libagx_tessellate(batch, tess_grid, AGX_BARRIER_ALL, mode,
-   libagx_prefix_sum_tess(batch, agx_1d(1024), state);
+                     LIBAGX_TESS_MODE_COUNT, state);
-   libagx_tessellate(batch, tess_grid, mode, LIBAGX_TESS_MODE_WITH_COUNTS,
+   libagx_prefix_sum_tess(batch, agx_1d(1024), AGX_BARRIER_ALL, state);
-                     state);
+   libagx_tessellate(batch, tess_grid, AGX_BARRIER_ALL, mode,
                     LIBAGX_TESS_MODE_WITH_COUNTS, state);
   /* Face culling state needs to be specialized for tess */
   ctx->dirty |= AGX_DIRTY_RS;
@ -5307,7 +5311,8 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
      if (indirect) {
         uint64_t addr = agx_get_query_address(batch, statistic);
-         libagx_increment_cs_invocations(batch, agx_1d(1), indirect, addr,
+         libagx_increment_cs_invocations(batch, agx_1d(1), AGX_BARRIER_ALL,
                                         indirect, addr,
                                         agx_workgroup_threads(wg));
      } else {
         agx_query_increment_cpu(ctx, statistic,
@ -5435,7 +5440,8 @@ agx_decompress_inplace(struct agx_batch *batch, struct pipe_surface *surf,
             ail_metadata_height_tl(layout, level),
             surf->u.tex.last_layer - surf->u.tex.first_layer + 1);
-   libagx_decompress(batch, grid, layout, surf->u.tex.first_layer, level,
+   libagx_decompress(batch, grid, AGX_BARRIER_ALL, layout,
                     surf->u.tex.first_layer, level,
                     agx_map_texture_gpu(rsrc, 0), images.gpu);
 }
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@ -796,8 +796,8 @@ void agx_launch(struct agx_batch *batch, struct agx_grid grid,
                unsigned variable_shared_mem);
 void agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid,
-                        enum libagx_program program, void *args,
+                        enum agx_barrier barrier, enum libagx_program program,
-                        size_t arg_size);
+                        void *args, size_t arg_size);
 #define MESA_DISPATCH_PRECOMP agx_launch_precomp