From 401b400de3f2d84bfb3436b7fed0fa90cf70b8e4 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 10 Jan 2025 13:19:22 -0500 Subject: [PATCH] nir,asahi,hk: add barrier argument to MESA_DISPATCH_PRECOMP In the current API, precomp implicitly assumes full barriers both before & after every dispatch. That's not good for performance. However, dropping the barriers and requiring user to explicitly call barrier functions before/after would have bad ergonomics. So, we add a new parameter to the standard MESA_DISPATCH_PRECOMP signature representing the barriers required around the dispatch. As usual, the actual type & semantic is left to drivers to define what makes sense for their hardware. We just reserve the place for it. (I think most drivers will want bitflags here, but I don't think the actual flags are worth. If a driver wanted to use a struct here, that would work too.) Since the asahi stack doesn't do anything clever with barriers yet, we mechnically add an AGX_BARRIER_ALL barrier to all precomp users in-tree. We can optimize that later, this just gets the flag-day change in with no functional change. For JM panfrost, this will provide a convenient place to stash both their "job barrier" bit and their "suppress prefetch" bit (which is really a sort of barrier / cache flush, if you think about it). Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/agx_helpers.h | 13 +++---- src/asahi/vulkan/hk_cmd_buffer.c | 3 +- src/asahi/vulkan/hk_cmd_buffer.h | 5 +-- src/asahi/vulkan/hk_cmd_dispatch.c | 4 +-- src/asahi/vulkan/hk_cmd_draw.c | 49 +++++++++++++++------------ src/asahi/vulkan/hk_query_pool.c | 8 ++--- src/compiler/nir/nir_precompiled.h | 17 +++++----- src/gallium/drivers/asahi/agx_query.c | 3 +- src/gallium/drivers/asahi/agx_state.c | 48 ++++++++++++++------------ src/gallium/drivers/asahi/agx_state.h | 4 +-- 10 files changed, 86 insertions(+), 68 deletions(-) diff --git a/src/asahi/lib/agx_helpers.h b/src/asahi/lib/agx_helpers.h index b80208c83c1..d7a711a2b39 100644 --- a/src/asahi/lib/agx_helpers.h +++ b/src/asahi/lib/agx_helpers.h @@ -260,20 +260,21 @@ agx_fill_decompress_args(struct ail_layout *layout, unsigned layer, } #undef libagx_decompress -#define libagx_decompress(context, grid, layout, layer, level, ptr, images) \ +#define libagx_decompress(context, grid, barrier, layout, layer, level, ptr, \ + images) \ libagx_decompress_struct( \ - context, grid, \ + context, grid, barrier, \ agx_fill_decompress_args(layout, layer, level, ptr, images), \ util_logbase2(layout->sample_count_sa)) -#define libagx_tessellate(context, grid, prim, mode, state) \ +#define libagx_tessellate(context, grid, barrier, prim, mode, state) \ if (prim == TESS_PRIMITIVE_QUADS) { \ - libagx_tess_quad(context, grid, state, mode); \ + libagx_tess_quad(context, grid, barrier, state, mode); \ } else if (prim == TESS_PRIMITIVE_TRIANGLES) { \ - libagx_tess_tri(context, grid, state, mode); \ + libagx_tess_tri(context, grid, barrier, state, mode); \ } else { \ assert(prim == TESS_PRIMITIVE_ISOLINES); \ - libagx_tess_isoline(context, grid, state, mode); \ + libagx_tess_isoline(context, grid, barrier, state, mode); \ } struct agx_border_packed; diff --git a/src/asahi/vulkan/hk_cmd_buffer.c b/src/asahi/vulkan/hk_cmd_buffer.c index 7f6ef6978d2..0d5862a48f4 100644 --- a/src/asahi/vulkan/hk_cmd_buffer.c +++ b/src/asahi/vulkan/hk_cmd_buffer.c @@ -709,7 +709,8 @@ hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s, void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid, - enum libagx_program idx, void *data, size_t data_size) + enum agx_barrier barrier, enum libagx_program idx, + void *data, size_t data_size) { struct hk_device *dev = hk_cmd_buffer_device(cs->cmd); struct agx_precompiled_shader *prog = agx_get_precompiled(&dev->bg_eot, idx); diff --git a/src/asahi/vulkan/hk_cmd_buffer.h b/src/asahi/vulkan/hk_cmd_buffer.h index 6e50279c4d6..87f0298695f 100644 --- a/src/asahi/vulkan/hk_cmd_buffer.h +++ b/src/asahi/vulkan/hk_cmd_buffer.h @@ -803,8 +803,9 @@ hk_dispatch_with_local_size(struct hk_cmd_buffer *cmd, struct hk_cs *cs, hk_dispatch_with_usc(dev, cs, &s->b.info, usc, grid, local_size); } -void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid gird, - enum libagx_program idx, void *data, size_t data_size); +void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid, + enum agx_barrier barrier, enum libagx_program idx, + void *data, size_t data_size); #define MESA_DISPATCH_PRECOMP hk_dispatch_precomp diff --git a/src/asahi/vulkan/hk_cmd_dispatch.c b/src/asahi/vulkan/hk_cmd_dispatch.c index d1131c99280..2d49def6fa0 100644 --- a/src/asahi/vulkan/hk_cmd_dispatch.c +++ b/src/asahi/vulkan/hk_cmd_dispatch.c @@ -103,8 +103,8 @@ dispatch(struct hk_cmd_buffer *cmd, struct agx_grid grid) perf_debug(dev, "CS invocation statistic"); uint64_t grid = cmd->state.cs.descriptors.root.cs.group_count_addr; - libagx_increment_cs_invocations(cs, agx_1d(1), grid, stat, - agx_workgroup_threads(local_size)); + libagx_increment_cs_invocations(cs, agx_1d(1), grid, AGX_BARRIER_ALL, + stat, agx_workgroup_threads(local_size)); } hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */); diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index 80d48f7ea5d..e98a87e4f81 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -861,7 +861,8 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer, agx_3d(ail_metadata_width_tl(layout, level) * 32, ail_metadata_height_tl(layout, level), layer_count); - libagx_decompress(cs, grid, layout, layer, level, base, + libagx_decompress(cs, grid, AGX_BARRIER_ALL, layout, layer, + level, base, hk_pool_upload(cmd, &imgs, sizeof(imgs), 64)); } } @@ -1389,8 +1390,8 @@ hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct hk_cs *cs, .zero_sink = dev->rodata.zero_sink, }; - libagx_unroll_restart_struct(cs, agx_1d(1024 * draw_count), ia, - draw.index_size, libagx_compact_prim(prim)); + libagx_unroll_restart_struct(cs, agx_1d(1024 * draw_count), AGX_BARRIER_ALL, + ia, draw.index_size, libagx_compact_prim(prim)); return agx_draw_indexed_indirect(ia.out_draw, dev->heap->va->addr, dev->heap->size, draw.index_size, @@ -1460,7 +1461,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, gsi.index_buffer_range_el = agx_draw_index_range_el(draw); } - libagx_gs_setup_indirect_struct(cs, agx_1d(1), gsi); + libagx_gs_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, gsi); grid_vs = agx_grid_indirect( geometry_params + offsetof(struct agx_geometry_params, vs_grid)); @@ -1486,7 +1487,8 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, hk_dispatch_with_local_size(cmd, cs, count, grid_gs, agx_workgroup(1, 1, 1)); - libagx_prefix_sum_geom(cs, agx_1d(1024 * count_words), geometry_params); + libagx_prefix_sum_geom(cs, agx_1d(1024 * count_words), AGX_BARRIER_ALL, + geometry_params); } /* Pre-GS shader */ @@ -1549,7 +1551,7 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs, args.in_index_buffer_range_el = agx_draw_index_range_el(draw); } - libagx_tess_setup_indirect_struct(cs, agx_1d(1), args); + libagx_tess_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, args); uint32_t grid_stride = sizeof(uint32_t) * 6; grid_vs = agx_grid_indirect_local(gfx->tess.grids + 0 * grid_stride); @@ -1565,7 +1567,8 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs, /* TCS invocation counter increments once per-patch */ if (tcs_stat) { perf_debug(dev, "Direct TCS statistic"); - libagx_increment_statistic(cs, agx_1d(1), tcs_stat, patches); + libagx_increment_statistic(cs, agx_1d(1), AGX_BARRIER_ALL, tcs_stat, + patches); } } @@ -1583,10 +1586,13 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs, grid_tcs, agx_workgroup(tcs->info.tess.tcs_output_patch_size, 1, 1)); /* First generate counts, then prefix sum them, and then tessellate. */ - libagx_tessellate(cs, grid_tess, info.mode, LIBAGX_TESS_MODE_COUNT, state); - libagx_prefix_sum_tess(cs, agx_1d(1024), state); - libagx_tessellate(cs, grid_tess, info.mode, LIBAGX_TESS_MODE_WITH_COUNTS, - state); + libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode, + LIBAGX_TESS_MODE_COUNT, state); + + libagx_prefix_sum_tess(cs, agx_1d(1024), AGX_BARRIER_ALL, state); + + libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode, + LIBAGX_TESS_MODE_WITH_COUNTS, state); return agx_draw_indexed_indirect(gfx->tess.out_draws, dev->heap->va->addr, dev->heap->size, AGX_INDEX_SIZE_U32, false); @@ -3358,12 +3364,13 @@ hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw, uint32_t index_size_B = agx_index_size_to_B(draw.index_size); libagx_increment_ia_restart( - cs, agx_1d(1024), ia_vertices, ia_prims, vs_invocations, c_prims, - c_inv, draw_ptr, draw.index_buffer, agx_draw_index_range_el(draw), - cmd->state.gfx.index.restart, index_size_B, prim); + cs, agx_1d(1024), AGX_BARRIER_ALL, ia_vertices, ia_prims, + vs_invocations, c_prims, c_inv, draw_ptr, draw.index_buffer, + agx_draw_index_range_el(draw), cmd->state.gfx.index.restart, + index_size_B, prim); } else { - libagx_increment_ia(cs, agx_1d(1), ia_vertices, ia_prims, vs_invocations, - c_prims, c_inv, draw_ptr, prim); + libagx_increment_ia(cs, agx_1d(1), AGX_BARRIER_ALL, ia_vertices, ia_prims, + vs_invocations, c_prims, c_inv, draw_ptr, prim); } } @@ -3476,7 +3483,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) struct hk_descriptor_state *desc = &cmd->state.gfx.descriptors; libagx_draw_without_adj( - ccs, agx_1d(1), out_draw, draw.b.ptr, + ccs, agx_1d(1), AGX_BARRIER_ALL, out_draw, draw.b.ptr, desc->root.draw.input_assembly, draw.index_buffer, draw.indexed ? agx_draw_index_range_el(draw) : 0, draw.indexed ? agx_index_size_to_B(draw.index_size) : 0, prim); @@ -3503,7 +3510,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) size_t size_B = libagx_draw_robust_index_vdm_size(); uint64_t target = hk_cs_alloc_for_indirect(cs, size_B); - libagx_draw_robust_index(ccs, agx_1d(32), target, + libagx_draw_robust_index(ccs, agx_1d(32), AGX_BARRIER_ALL, target, hk_geometry_state(cmd), draw.b.ptr, draw.index_buffer, draw.index_buffer_range_B, draw.restart, topology, draw.index_size); @@ -3728,8 +3735,8 @@ hk_draw_indirect_count(VkCommandBuffer commandBuffer, VkBuffer _buffer, uint64_t in = hk_buffer_address(buffer, offset); uint64_t count_addr = hk_buffer_address(count_buffer, countBufferOffset); - libagx_predicate_indirect(cs, agx_1d(maxDrawCount), patched, in, count_addr, - stride / 4, indexed); + libagx_predicate_indirect(cs, agx_1d(maxDrawCount), AGX_BARRIER_ALL, patched, + in, count_addr, stride / 4, indexed); if (indexed) { hk_draw_indexed_indirect_inner(commandBuffer, patched, maxDrawCount, @@ -3847,7 +3854,7 @@ hk_begin_end_xfb(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, if (copies > 0) { perf_debug(dev, "XFB counter copy"); - libagx_copy_xfb_counters(cs, agx_1d(copies), + libagx_copy_xfb_counters(cs, agx_1d(copies), AGX_BARRIER_ALL, hk_pool_upload(cmd, ¶ms, sizeof(params), 8)); } } diff --git a/src/asahi/vulkan/hk_query_pool.c b/src/asahi/vulkan/hk_query_pool.c index e0187c7ba7f..7d132ba498c 100644 --- a/src/asahi/vulkan/hk_query_pool.c +++ b/src/asahi/vulkan/hk_query_pool.c @@ -268,7 +268,7 @@ hk_dispatch_imm_writes(struct hk_cmd_buffer *cmd, struct hk_cs *cs) util_dynarray_num_elements(&cs->imm_writes, struct libagx_imm_write); assert(count > 0); - libagx_write_u32s(cs, agx_1d(count), params); + libagx_write_u32s(cs, agx_1d(count), AGX_BARRIER_ALL, params); } void @@ -305,7 +305,7 @@ hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value, hk_cdm_cache_flush(dev, cs); perf_debug(dev, "Queued write"); - libagx_write_u32(cs, agx_1d(1), address, value); + libagx_write_u32(cs, agx_1d(1), AGX_BARRIER_ALL, address, value); } /** @@ -433,7 +433,7 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, if (!after) return; - libagx_copy_timestamp(after, agx_1d(1), report_addr, + libagx_copy_timestamp(after, agx_1d(1), AGX_BARRIER_ALL, report_addr, cs->timestamp.end.addr); } else { cs->timestamp.end = (struct agx_timestamp_req){ @@ -702,5 +702,5 @@ hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, .with_availability = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT, }; - libagx_copy_query_struct(cs, agx_1d(queryCount), info); + libagx_copy_query_struct(cs, agx_1d(queryCount), AGX_BARRIER_ALL, info); } diff --git a/src/compiler/nir/nir_precompiled.h b/src/compiler/nir/nir_precompiled.h index 48d34c81fa5..01eed53fbd3 100644 --- a/src/compiler/nir/nir_precompiled.h +++ b/src/compiler/nir/nir_precompiled.h @@ -104,14 +104,15 @@ * implement that mechanism, a driver must implement the following function * signature: * - * MESA_DISPATCH_PRECOMP(context, grid, kernel index, argument pointer, - * size of arguments) + * MESA_DISPATCH_PRECOMP(context, grid, barrier, kernel index, + * argument pointer, size of arguments) * * The exact types used are determined by the driver. context is something like - * a Vulkan command buffer. grid represents the 3D dispatch size. kernel index - * is the index of the precompiled kernel (nir_precomp_index). argument pointer - * is a host pointer to the sized argument structure, which the driver must - * upload and bind (e.g. as push constants). + * a Vulkan command buffer. grid represents the 3D dispatch size. barrier + * describes the synchronization and cache flushing required before and after + * the dispatch. kernel index is the index of the precompiled kernel + * (nir_precomp_index). argument pointer is a host pointer to the sized argument + * structure, which the driver must upload and bind (e.g. as push constants). * * Because the types are ambiguous here, the same mechanism works for both * Gallium and Vulkan drivers. @@ -479,7 +480,7 @@ nir_precomp_print_dispatch_macros(FILE *fp, const struct nir_precomp_opts *opt, for (unsigned i = 0; i < 2; ++i) { bool is_struct = i == 0; - fprintf(fp, "#define %s%s(_context, _grid%s", func->name, + fprintf(fp, "#define %s%s(_context, _grid, _barrier%s", func->name, is_struct ? "_struct" : "", is_struct ? ", _data" : ""); /* Add the arguments, including variant parameters. For struct macros, @@ -523,7 +524,7 @@ nir_precomp_print_dispatch_macros(FILE *fp, const struct nir_precomp_opts *opt, /* Dispatch via MESA_DISPATCH_PRECOMP, which the driver must #define * suitably before #include-ing this file. */ - fprintf(fp, " MESA_DISPATCH_PRECOMP(_context, _grid, "); + fprintf(fp, " MESA_DISPATCH_PRECOMP(_context, _grid, _barrier, "); nir_precomp_print_enum_value(fp, func); nir_precomp_print_variant_params(fp, func, false); fprintf(fp, ", &_args, sizeof(_args)); \\\n"); diff --git a/src/gallium/drivers/asahi/agx_query.c b/src/gallium/drivers/asahi/agx_query.c index c15cc6f226c..2a8ff5c1c6b 100644 --- a/src/gallium/drivers/asahi/agx_query.c +++ b/src/gallium/drivers/asahi/agx_query.c @@ -15,6 +15,7 @@ #include "agx_device.h" #include "agx_state.h" #include "libagx.h" +#include "libagx_dgc.h" #include "libagx_shaders.h" static bool @@ -500,7 +501,7 @@ agx_get_query_result_resource_gpu(struct agx_context *ctx, : copy_type == QUERY_COPY_BOOL32 ? 4 : 0; - libagx_copy_query_gl(batch, agx_1d(1), query->ptr.gpu, + libagx_copy_query_gl(batch, agx_1d(1), AGX_BARRIER_ALL, query->ptr.gpu, rsrc->bo->va->addr + offset, result_type, bool_size); return true; } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 6fa06b66c4c..c0d4f0ccc70 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -3103,7 +3103,8 @@ agx_launch_internal(struct agx_batch *batch, struct agx_grid grid, void agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid, - enum libagx_program program, void *args, size_t arg_size) + enum agx_barrier barrier, enum libagx_program program, + void *args, size_t arg_size) { struct agx_device *dev = agx_device(batch->ctx->base.screen); struct agx_precompiled_shader *cs = @@ -3935,14 +3936,15 @@ agx_ia_update(struct agx_batch *batch, const struct pipe_draw_info *info, perf_debug(dev, "Input assembly counters with primitive restart"); libagx_increment_ia_restart( - batch, agx_1d(1024), ia_vertices, ia_primitives, vs_invocations, - c_prims, c_invs, draw, ib, ib_range_el, info->restart_index, - info->index_size, info->mode); + batch, agx_1d(1024), AGX_BARRIER_ALL, ia_vertices, ia_primitives, + vs_invocations, c_prims, c_invs, draw, ib, ib_range_el, + info->restart_index, info->index_size, info->mode); } else { perf_debug(dev, "Input assembly counters"); - libagx_increment_ia(batch, agx_1d(1), ia_vertices, ia_primitives, - vs_invocations, c_prims, c_invs, draw, info->mode); + libagx_increment_ia(batch, agx_1d(1), AGX_BARRIER_ALL, ia_vertices, + ia_primitives, vs_invocations, c_prims, c_invs, draw, + info->mode); } } @@ -4146,7 +4148,7 @@ agx_launch_gs_prerast(struct agx_batch *batch, .prim = info->mode, }; - libagx_gs_setup_indirect_struct(batch, agx_1d(1), gsi); + libagx_gs_setup_indirect_struct(batch, agx_1d(1), AGX_BARRIER_ALL, gsi); wg = agx_workgroup(1, 1, 1); grid_vs = @@ -4172,7 +4174,8 @@ agx_launch_gs_prerast(struct agx_batch *batch, agx_launch(batch, grid_gs, wg, gs->gs_count, NULL, PIPE_SHADER_GEOMETRY, 0); - libagx_prefix_sum_geom(batch, agx_1d(1024 * gs->gs_count_words), gp); + libagx_prefix_sum_geom(batch, agx_1d(1024 * gs->gs_count_words), + AGX_BARRIER_ALL, gp); } /* Pre-GS shader */ @@ -4243,9 +4246,9 @@ agx_draw_without_restart(struct agx_batch *batch, }; /* Unroll the index buffer for each draw */ - libagx_unroll_restart_struct(batch, agx_1d(1024 * indirect->draw_count), - unroll, util_logbase2(info->index_size), - libagx_compact_prim(info->mode)); + libagx_unroll_restart_struct( + batch, agx_1d(1024 * indirect->draw_count), AGX_BARRIER_ALL, unroll, + util_logbase2(info->index_size), libagx_compact_prim(info->mode)); /* Now draw the results without restart */ struct pipe_draw_info new_info = { @@ -4675,10 +4678,10 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info, uint64_t grids = agx_pool_alloc_aligned(&batch->pool, grid_stride * 3, 4).gpu; - libagx_tess_setup_indirect(batch, agx_1d(1), state, grids, - 0 /* XXX: IA */, indirect_ptr, vertex_out_ptr, - 0, 0, 0 /* XXX: Index buffer */, - ctx->vs->b.info.outputs, tcs_statistic); + libagx_tess_setup_indirect( + batch, agx_1d(1), AGX_BARRIER_ALL, state, grids, 0 /* XXX: IA */, + indirect_ptr, vertex_out_ptr, 0, 0, 0 /* XXX: Index buffer */, + ctx->vs->b.info.outputs, tcs_statistic); batch->uniforms.vertex_output_buffer_ptr = vertex_out_ptr; @@ -4698,10 +4701,11 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info, batch->uniforms.vertex_output_buffer_ptr = 0; /* Generate counts, then prefix sum them, then finally tessellate. */ - libagx_tessellate(batch, tess_grid, mode, LIBAGX_TESS_MODE_COUNT, state); - libagx_prefix_sum_tess(batch, agx_1d(1024), state); - libagx_tessellate(batch, tess_grid, mode, LIBAGX_TESS_MODE_WITH_COUNTS, - state); + libagx_tessellate(batch, tess_grid, AGX_BARRIER_ALL, mode, + LIBAGX_TESS_MODE_COUNT, state); + libagx_prefix_sum_tess(batch, agx_1d(1024), AGX_BARRIER_ALL, state); + libagx_tessellate(batch, tess_grid, AGX_BARRIER_ALL, mode, + LIBAGX_TESS_MODE_WITH_COUNTS, state); /* Face culling state needs to be specialized for tess */ ctx->dirty |= AGX_DIRTY_RS; @@ -5307,7 +5311,8 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) if (indirect) { uint64_t addr = agx_get_query_address(batch, statistic); - libagx_increment_cs_invocations(batch, agx_1d(1), indirect, addr, + libagx_increment_cs_invocations(batch, agx_1d(1), AGX_BARRIER_ALL, + indirect, addr, agx_workgroup_threads(wg)); } else { agx_query_increment_cpu(ctx, statistic, @@ -5435,7 +5440,8 @@ agx_decompress_inplace(struct agx_batch *batch, struct pipe_surface *surf, ail_metadata_height_tl(layout, level), surf->u.tex.last_layer - surf->u.tex.first_layer + 1); - libagx_decompress(batch, grid, layout, surf->u.tex.first_layer, level, + libagx_decompress(batch, grid, AGX_BARRIER_ALL, layout, + surf->u.tex.first_layer, level, agx_map_texture_gpu(rsrc, 0), images.gpu); } diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 1a70ab79261..628c0e84e76 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -796,8 +796,8 @@ void agx_launch(struct agx_batch *batch, struct agx_grid grid, unsigned variable_shared_mem); void agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid, - enum libagx_program program, void *args, - size_t arg_size); + enum agx_barrier barrier, enum libagx_program program, + void *args, size_t arg_size); #define MESA_DISPATCH_PRECOMP agx_launch_precomp