nir,asahi,hk: add barrier argument to MESA_DISPATCH_PRECOMP

In the current API, precomp implicitly assumes full barriers both before & after
every dispatch. That's not good for performance. However, dropping the barriers
and requiring user to explicitly call barrier functions before/after would have
bad ergonomics.

So, we add a new parameter to the standard MESA_DISPATCH_PRECOMP signature
representing the barriers required around the dispatch. As usual, the actual
type & semantic is left to drivers to define what makes sense for their
hardware. We just reserve the place for it. (I think most drivers will want
bitflags here, but I don't think the actual flags are worth. If a driver wanted
to use a struct here, that would work too.)

Since the asahi stack doesn't do anything clever with barriers yet, we
mechnically add an AGX_BARRIER_ALL barrier to all precomp users in-tree. We can
optimize that later, this just gets the flag-day change in with no functional
change.

For JM panfrost, this will provide a convenient place to stash both their "job
barrier" bit and their "suppress prefetch" bit (which is really a sort of
barrier / cache flush, if you think about it).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32980>
This commit is contained in:
Alyssa Rosenzweig 2025-01-10 13:19:22 -05:00 committed by Marge Bot
parent 4955a68a03
commit 401b400de3
10 changed files with 86 additions and 68 deletions

View file

@ -260,20 +260,21 @@ agx_fill_decompress_args(struct ail_layout *layout, unsigned layer,
} }
#undef libagx_decompress #undef libagx_decompress
#define libagx_decompress(context, grid, layout, layer, level, ptr, images) \ #define libagx_decompress(context, grid, barrier, layout, layer, level, ptr, \
images) \
libagx_decompress_struct( \ libagx_decompress_struct( \
context, grid, \ context, grid, barrier, \
agx_fill_decompress_args(layout, layer, level, ptr, images), \ agx_fill_decompress_args(layout, layer, level, ptr, images), \
util_logbase2(layout->sample_count_sa)) util_logbase2(layout->sample_count_sa))
#define libagx_tessellate(context, grid, prim, mode, state) \ #define libagx_tessellate(context, grid, barrier, prim, mode, state) \
if (prim == TESS_PRIMITIVE_QUADS) { \ if (prim == TESS_PRIMITIVE_QUADS) { \
libagx_tess_quad(context, grid, state, mode); \ libagx_tess_quad(context, grid, barrier, state, mode); \
} else if (prim == TESS_PRIMITIVE_TRIANGLES) { \ } else if (prim == TESS_PRIMITIVE_TRIANGLES) { \
libagx_tess_tri(context, grid, state, mode); \ libagx_tess_tri(context, grid, barrier, state, mode); \
} else { \ } else { \
assert(prim == TESS_PRIMITIVE_ISOLINES); \ assert(prim == TESS_PRIMITIVE_ISOLINES); \
libagx_tess_isoline(context, grid, state, mode); \ libagx_tess_isoline(context, grid, barrier, state, mode); \
} }
struct agx_border_packed; struct agx_border_packed;

View file

@ -709,7 +709,8 @@ hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s,
void void
hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid, hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid,
enum libagx_program idx, void *data, size_t data_size) enum agx_barrier barrier, enum libagx_program idx,
void *data, size_t data_size)
{ {
struct hk_device *dev = hk_cmd_buffer_device(cs->cmd); struct hk_device *dev = hk_cmd_buffer_device(cs->cmd);
struct agx_precompiled_shader *prog = agx_get_precompiled(&dev->bg_eot, idx); struct agx_precompiled_shader *prog = agx_get_precompiled(&dev->bg_eot, idx);

View file

@ -803,8 +803,9 @@ hk_dispatch_with_local_size(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
hk_dispatch_with_usc(dev, cs, &s->b.info, usc, grid, local_size); hk_dispatch_with_usc(dev, cs, &s->b.info, usc, grid, local_size);
} }
void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid gird, void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid,
enum libagx_program idx, void *data, size_t data_size); enum agx_barrier barrier, enum libagx_program idx,
void *data, size_t data_size);
#define MESA_DISPATCH_PRECOMP hk_dispatch_precomp #define MESA_DISPATCH_PRECOMP hk_dispatch_precomp

View file

@ -103,8 +103,8 @@ dispatch(struct hk_cmd_buffer *cmd, struct agx_grid grid)
perf_debug(dev, "CS invocation statistic"); perf_debug(dev, "CS invocation statistic");
uint64_t grid = cmd->state.cs.descriptors.root.cs.group_count_addr; uint64_t grid = cmd->state.cs.descriptors.root.cs.group_count_addr;
libagx_increment_cs_invocations(cs, agx_1d(1), grid, stat, libagx_increment_cs_invocations(cs, agx_1d(1), grid, AGX_BARRIER_ALL,
agx_workgroup_threads(local_size)); stat, agx_workgroup_threads(local_size));
} }
hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */); hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);

View file

@ -861,7 +861,8 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
agx_3d(ail_metadata_width_tl(layout, level) * 32, agx_3d(ail_metadata_width_tl(layout, level) * 32,
ail_metadata_height_tl(layout, level), layer_count); ail_metadata_height_tl(layout, level), layer_count);
libagx_decompress(cs, grid, layout, layer, level, base, libagx_decompress(cs, grid, AGX_BARRIER_ALL, layout, layer,
level, base,
hk_pool_upload(cmd, &imgs, sizeof(imgs), 64)); hk_pool_upload(cmd, &imgs, sizeof(imgs), 64));
} }
} }
@ -1389,8 +1390,8 @@ hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
.zero_sink = dev->rodata.zero_sink, .zero_sink = dev->rodata.zero_sink,
}; };
libagx_unroll_restart_struct(cs, agx_1d(1024 * draw_count), ia, libagx_unroll_restart_struct(cs, agx_1d(1024 * draw_count), AGX_BARRIER_ALL,
draw.index_size, libagx_compact_prim(prim)); ia, draw.index_size, libagx_compact_prim(prim));
return agx_draw_indexed_indirect(ia.out_draw, dev->heap->va->addr, return agx_draw_indexed_indirect(ia.out_draw, dev->heap->va->addr,
dev->heap->size, draw.index_size, dev->heap->size, draw.index_size,
@ -1460,7 +1461,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
gsi.index_buffer_range_el = agx_draw_index_range_el(draw); gsi.index_buffer_range_el = agx_draw_index_range_el(draw);
} }
libagx_gs_setup_indirect_struct(cs, agx_1d(1), gsi); libagx_gs_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, gsi);
grid_vs = agx_grid_indirect( grid_vs = agx_grid_indirect(
geometry_params + offsetof(struct agx_geometry_params, vs_grid)); geometry_params + offsetof(struct agx_geometry_params, vs_grid));
@ -1486,7 +1487,8 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
hk_dispatch_with_local_size(cmd, cs, count, grid_gs, hk_dispatch_with_local_size(cmd, cs, count, grid_gs,
agx_workgroup(1, 1, 1)); agx_workgroup(1, 1, 1));
libagx_prefix_sum_geom(cs, agx_1d(1024 * count_words), geometry_params); libagx_prefix_sum_geom(cs, agx_1d(1024 * count_words), AGX_BARRIER_ALL,
geometry_params);
} }
/* Pre-GS shader */ /* Pre-GS shader */
@ -1549,7 +1551,7 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
args.in_index_buffer_range_el = agx_draw_index_range_el(draw); args.in_index_buffer_range_el = agx_draw_index_range_el(draw);
} }
libagx_tess_setup_indirect_struct(cs, agx_1d(1), args); libagx_tess_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, args);
uint32_t grid_stride = sizeof(uint32_t) * 6; uint32_t grid_stride = sizeof(uint32_t) * 6;
grid_vs = agx_grid_indirect_local(gfx->tess.grids + 0 * grid_stride); grid_vs = agx_grid_indirect_local(gfx->tess.grids + 0 * grid_stride);
@ -1565,7 +1567,8 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
/* TCS invocation counter increments once per-patch */ /* TCS invocation counter increments once per-patch */
if (tcs_stat) { if (tcs_stat) {
perf_debug(dev, "Direct TCS statistic"); perf_debug(dev, "Direct TCS statistic");
libagx_increment_statistic(cs, agx_1d(1), tcs_stat, patches); libagx_increment_statistic(cs, agx_1d(1), AGX_BARRIER_ALL, tcs_stat,
patches);
} }
} }
@ -1583,10 +1586,13 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
grid_tcs, agx_workgroup(tcs->info.tess.tcs_output_patch_size, 1, 1)); grid_tcs, agx_workgroup(tcs->info.tess.tcs_output_patch_size, 1, 1));
/* First generate counts, then prefix sum them, and then tessellate. */ /* First generate counts, then prefix sum them, and then tessellate. */
libagx_tessellate(cs, grid_tess, info.mode, LIBAGX_TESS_MODE_COUNT, state); libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode,
libagx_prefix_sum_tess(cs, agx_1d(1024), state); LIBAGX_TESS_MODE_COUNT, state);
libagx_tessellate(cs, grid_tess, info.mode, LIBAGX_TESS_MODE_WITH_COUNTS,
state); libagx_prefix_sum_tess(cs, agx_1d(1024), AGX_BARRIER_ALL, state);
libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode,
LIBAGX_TESS_MODE_WITH_COUNTS, state);
return agx_draw_indexed_indirect(gfx->tess.out_draws, dev->heap->va->addr, return agx_draw_indexed_indirect(gfx->tess.out_draws, dev->heap->va->addr,
dev->heap->size, AGX_INDEX_SIZE_U32, false); dev->heap->size, AGX_INDEX_SIZE_U32, false);
@ -3358,12 +3364,13 @@ hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw,
uint32_t index_size_B = agx_index_size_to_B(draw.index_size); uint32_t index_size_B = agx_index_size_to_B(draw.index_size);
libagx_increment_ia_restart( libagx_increment_ia_restart(
cs, agx_1d(1024), ia_vertices, ia_prims, vs_invocations, c_prims, cs, agx_1d(1024), AGX_BARRIER_ALL, ia_vertices, ia_prims,
c_inv, draw_ptr, draw.index_buffer, agx_draw_index_range_el(draw), vs_invocations, c_prims, c_inv, draw_ptr, draw.index_buffer,
cmd->state.gfx.index.restart, index_size_B, prim); agx_draw_index_range_el(draw), cmd->state.gfx.index.restart,
index_size_B, prim);
} else { } else {
libagx_increment_ia(cs, agx_1d(1), ia_vertices, ia_prims, vs_invocations, libagx_increment_ia(cs, agx_1d(1), AGX_BARRIER_ALL, ia_vertices, ia_prims,
c_prims, c_inv, draw_ptr, prim); vs_invocations, c_prims, c_inv, draw_ptr, prim);
} }
} }
@ -3476,7 +3483,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
struct hk_descriptor_state *desc = &cmd->state.gfx.descriptors; struct hk_descriptor_state *desc = &cmd->state.gfx.descriptors;
libagx_draw_without_adj( libagx_draw_without_adj(
ccs, agx_1d(1), out_draw, draw.b.ptr, ccs, agx_1d(1), AGX_BARRIER_ALL, out_draw, draw.b.ptr,
desc->root.draw.input_assembly, draw.index_buffer, desc->root.draw.input_assembly, draw.index_buffer,
draw.indexed ? agx_draw_index_range_el(draw) : 0, draw.indexed ? agx_draw_index_range_el(draw) : 0,
draw.indexed ? agx_index_size_to_B(draw.index_size) : 0, prim); draw.indexed ? agx_index_size_to_B(draw.index_size) : 0, prim);
@ -3503,7 +3510,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
size_t size_B = libagx_draw_robust_index_vdm_size(); size_t size_B = libagx_draw_robust_index_vdm_size();
uint64_t target = hk_cs_alloc_for_indirect(cs, size_B); uint64_t target = hk_cs_alloc_for_indirect(cs, size_B);
libagx_draw_robust_index(ccs, agx_1d(32), target, libagx_draw_robust_index(ccs, agx_1d(32), AGX_BARRIER_ALL, target,
hk_geometry_state(cmd), draw.b.ptr, hk_geometry_state(cmd), draw.b.ptr,
draw.index_buffer, draw.index_buffer_range_B, draw.index_buffer, draw.index_buffer_range_B,
draw.restart, topology, draw.index_size); draw.restart, topology, draw.index_size);
@ -3728,8 +3735,8 @@ hk_draw_indirect_count(VkCommandBuffer commandBuffer, VkBuffer _buffer,
uint64_t in = hk_buffer_address(buffer, offset); uint64_t in = hk_buffer_address(buffer, offset);
uint64_t count_addr = hk_buffer_address(count_buffer, countBufferOffset); uint64_t count_addr = hk_buffer_address(count_buffer, countBufferOffset);
libagx_predicate_indirect(cs, agx_1d(maxDrawCount), patched, in, count_addr, libagx_predicate_indirect(cs, agx_1d(maxDrawCount), AGX_BARRIER_ALL, patched,
stride / 4, indexed); in, count_addr, stride / 4, indexed);
if (indexed) { if (indexed) {
hk_draw_indexed_indirect_inner(commandBuffer, patched, maxDrawCount, hk_draw_indexed_indirect_inner(commandBuffer, patched, maxDrawCount,
@ -3847,7 +3854,7 @@ hk_begin_end_xfb(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
if (copies > 0) { if (copies > 0) {
perf_debug(dev, "XFB counter copy"); perf_debug(dev, "XFB counter copy");
libagx_copy_xfb_counters(cs, agx_1d(copies), libagx_copy_xfb_counters(cs, agx_1d(copies), AGX_BARRIER_ALL,
hk_pool_upload(cmd, &params, sizeof(params), 8)); hk_pool_upload(cmd, &params, sizeof(params), 8));
} }
} }

View file

@ -268,7 +268,7 @@ hk_dispatch_imm_writes(struct hk_cmd_buffer *cmd, struct hk_cs *cs)
util_dynarray_num_elements(&cs->imm_writes, struct libagx_imm_write); util_dynarray_num_elements(&cs->imm_writes, struct libagx_imm_write);
assert(count > 0); assert(count > 0);
libagx_write_u32s(cs, agx_1d(count), params); libagx_write_u32s(cs, agx_1d(count), AGX_BARRIER_ALL, params);
} }
void void
@ -305,7 +305,7 @@ hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value,
hk_cdm_cache_flush(dev, cs); hk_cdm_cache_flush(dev, cs);
perf_debug(dev, "Queued write"); perf_debug(dev, "Queued write");
libagx_write_u32(cs, agx_1d(1), address, value); libagx_write_u32(cs, agx_1d(1), AGX_BARRIER_ALL, address, value);
} }
/** /**
@ -433,7 +433,7 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
if (!after) if (!after)
return; return;
libagx_copy_timestamp(after, agx_1d(1), report_addr, libagx_copy_timestamp(after, agx_1d(1), AGX_BARRIER_ALL, report_addr,
cs->timestamp.end.addr); cs->timestamp.end.addr);
} else { } else {
cs->timestamp.end = (struct agx_timestamp_req){ cs->timestamp.end = (struct agx_timestamp_req){
@ -702,5 +702,5 @@ hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
.with_availability = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT, .with_availability = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT,
}; };
libagx_copy_query_struct(cs, agx_1d(queryCount), info); libagx_copy_query_struct(cs, agx_1d(queryCount), AGX_BARRIER_ALL, info);
} }

View file

@ -104,14 +104,15 @@
* implement that mechanism, a driver must implement the following function * implement that mechanism, a driver must implement the following function
* signature: * signature:
* *
* MESA_DISPATCH_PRECOMP(context, grid, kernel index, argument pointer, * MESA_DISPATCH_PRECOMP(context, grid, barrier, kernel index,
* size of arguments) * argument pointer, size of arguments)
* *
* The exact types used are determined by the driver. context is something like * The exact types used are determined by the driver. context is something like
* a Vulkan command buffer. grid represents the 3D dispatch size. kernel index * a Vulkan command buffer. grid represents the 3D dispatch size. barrier
* is the index of the precompiled kernel (nir_precomp_index). argument pointer * describes the synchronization and cache flushing required before and after
* is a host pointer to the sized argument structure, which the driver must * the dispatch. kernel index is the index of the precompiled kernel
* upload and bind (e.g. as push constants). * (nir_precomp_index). argument pointer is a host pointer to the sized argument
* structure, which the driver must upload and bind (e.g. as push constants).
* *
* Because the types are ambiguous here, the same mechanism works for both * Because the types are ambiguous here, the same mechanism works for both
* Gallium and Vulkan drivers. * Gallium and Vulkan drivers.
@ -479,7 +480,7 @@ nir_precomp_print_dispatch_macros(FILE *fp, const struct nir_precomp_opts *opt,
for (unsigned i = 0; i < 2; ++i) { for (unsigned i = 0; i < 2; ++i) {
bool is_struct = i == 0; bool is_struct = i == 0;
fprintf(fp, "#define %s%s(_context, _grid%s", func->name, fprintf(fp, "#define %s%s(_context, _grid, _barrier%s", func->name,
is_struct ? "_struct" : "", is_struct ? ", _data" : ""); is_struct ? "_struct" : "", is_struct ? ", _data" : "");
/* Add the arguments, including variant parameters. For struct macros, /* Add the arguments, including variant parameters. For struct macros,
@ -523,7 +524,7 @@ nir_precomp_print_dispatch_macros(FILE *fp, const struct nir_precomp_opts *opt,
/* Dispatch via MESA_DISPATCH_PRECOMP, which the driver must #define /* Dispatch via MESA_DISPATCH_PRECOMP, which the driver must #define
* suitably before #include-ing this file. * suitably before #include-ing this file.
*/ */
fprintf(fp, " MESA_DISPATCH_PRECOMP(_context, _grid, "); fprintf(fp, " MESA_DISPATCH_PRECOMP(_context, _grid, _barrier, ");
nir_precomp_print_enum_value(fp, func); nir_precomp_print_enum_value(fp, func);
nir_precomp_print_variant_params(fp, func, false); nir_precomp_print_variant_params(fp, func, false);
fprintf(fp, ", &_args, sizeof(_args)); \\\n"); fprintf(fp, ", &_args, sizeof(_args)); \\\n");

View file

@ -15,6 +15,7 @@
#include "agx_device.h" #include "agx_device.h"
#include "agx_state.h" #include "agx_state.h"
#include "libagx.h" #include "libagx.h"
#include "libagx_dgc.h"
#include "libagx_shaders.h" #include "libagx_shaders.h"
static bool static bool
@ -500,7 +501,7 @@ agx_get_query_result_resource_gpu(struct agx_context *ctx,
: copy_type == QUERY_COPY_BOOL32 ? 4 : copy_type == QUERY_COPY_BOOL32 ? 4
: 0; : 0;
libagx_copy_query_gl(batch, agx_1d(1), query->ptr.gpu, libagx_copy_query_gl(batch, agx_1d(1), AGX_BARRIER_ALL, query->ptr.gpu,
rsrc->bo->va->addr + offset, result_type, bool_size); rsrc->bo->va->addr + offset, result_type, bool_size);
return true; return true;
} }

View file

@ -3103,7 +3103,8 @@ agx_launch_internal(struct agx_batch *batch, struct agx_grid grid,
void void
agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid, agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid,
enum libagx_program program, void *args, size_t arg_size) enum agx_barrier barrier, enum libagx_program program,
void *args, size_t arg_size)
{ {
struct agx_device *dev = agx_device(batch->ctx->base.screen); struct agx_device *dev = agx_device(batch->ctx->base.screen);
struct agx_precompiled_shader *cs = struct agx_precompiled_shader *cs =
@ -3935,14 +3936,15 @@ agx_ia_update(struct agx_batch *batch, const struct pipe_draw_info *info,
perf_debug(dev, "Input assembly counters with primitive restart"); perf_debug(dev, "Input assembly counters with primitive restart");
libagx_increment_ia_restart( libagx_increment_ia_restart(
batch, agx_1d(1024), ia_vertices, ia_primitives, vs_invocations, batch, agx_1d(1024), AGX_BARRIER_ALL, ia_vertices, ia_primitives,
c_prims, c_invs, draw, ib, ib_range_el, info->restart_index, vs_invocations, c_prims, c_invs, draw, ib, ib_range_el,
info->index_size, info->mode); info->restart_index, info->index_size, info->mode);
} else { } else {
perf_debug(dev, "Input assembly counters"); perf_debug(dev, "Input assembly counters");
libagx_increment_ia(batch, agx_1d(1), ia_vertices, ia_primitives, libagx_increment_ia(batch, agx_1d(1), AGX_BARRIER_ALL, ia_vertices,
vs_invocations, c_prims, c_invs, draw, info->mode); ia_primitives, vs_invocations, c_prims, c_invs, draw,
info->mode);
} }
} }
@ -4146,7 +4148,7 @@ agx_launch_gs_prerast(struct agx_batch *batch,
.prim = info->mode, .prim = info->mode,
}; };
libagx_gs_setup_indirect_struct(batch, agx_1d(1), gsi); libagx_gs_setup_indirect_struct(batch, agx_1d(1), AGX_BARRIER_ALL, gsi);
wg = agx_workgroup(1, 1, 1); wg = agx_workgroup(1, 1, 1);
grid_vs = grid_vs =
@ -4172,7 +4174,8 @@ agx_launch_gs_prerast(struct agx_batch *batch,
agx_launch(batch, grid_gs, wg, gs->gs_count, NULL, PIPE_SHADER_GEOMETRY, agx_launch(batch, grid_gs, wg, gs->gs_count, NULL, PIPE_SHADER_GEOMETRY,
0); 0);
libagx_prefix_sum_geom(batch, agx_1d(1024 * gs->gs_count_words), gp); libagx_prefix_sum_geom(batch, agx_1d(1024 * gs->gs_count_words),
AGX_BARRIER_ALL, gp);
} }
/* Pre-GS shader */ /* Pre-GS shader */
@ -4243,9 +4246,9 @@ agx_draw_without_restart(struct agx_batch *batch,
}; };
/* Unroll the index buffer for each draw */ /* Unroll the index buffer for each draw */
libagx_unroll_restart_struct(batch, agx_1d(1024 * indirect->draw_count), libagx_unroll_restart_struct(
unroll, util_logbase2(info->index_size), batch, agx_1d(1024 * indirect->draw_count), AGX_BARRIER_ALL, unroll,
libagx_compact_prim(info->mode)); util_logbase2(info->index_size), libagx_compact_prim(info->mode));
/* Now draw the results without restart */ /* Now draw the results without restart */
struct pipe_draw_info new_info = { struct pipe_draw_info new_info = {
@ -4675,10 +4678,10 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
uint64_t grids = uint64_t grids =
agx_pool_alloc_aligned(&batch->pool, grid_stride * 3, 4).gpu; agx_pool_alloc_aligned(&batch->pool, grid_stride * 3, 4).gpu;
libagx_tess_setup_indirect(batch, agx_1d(1), state, grids, libagx_tess_setup_indirect(
0 /* XXX: IA */, indirect_ptr, vertex_out_ptr, batch, agx_1d(1), AGX_BARRIER_ALL, state, grids, 0 /* XXX: IA */,
0, 0, 0 /* XXX: Index buffer */, indirect_ptr, vertex_out_ptr, 0, 0, 0 /* XXX: Index buffer */,
ctx->vs->b.info.outputs, tcs_statistic); ctx->vs->b.info.outputs, tcs_statistic);
batch->uniforms.vertex_output_buffer_ptr = vertex_out_ptr; batch->uniforms.vertex_output_buffer_ptr = vertex_out_ptr;
@ -4698,10 +4701,11 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
batch->uniforms.vertex_output_buffer_ptr = 0; batch->uniforms.vertex_output_buffer_ptr = 0;
/* Generate counts, then prefix sum them, then finally tessellate. */ /* Generate counts, then prefix sum them, then finally tessellate. */
libagx_tessellate(batch, tess_grid, mode, LIBAGX_TESS_MODE_COUNT, state); libagx_tessellate(batch, tess_grid, AGX_BARRIER_ALL, mode,
libagx_prefix_sum_tess(batch, agx_1d(1024), state); LIBAGX_TESS_MODE_COUNT, state);
libagx_tessellate(batch, tess_grid, mode, LIBAGX_TESS_MODE_WITH_COUNTS, libagx_prefix_sum_tess(batch, agx_1d(1024), AGX_BARRIER_ALL, state);
state); libagx_tessellate(batch, tess_grid, AGX_BARRIER_ALL, mode,
LIBAGX_TESS_MODE_WITH_COUNTS, state);
/* Face culling state needs to be specialized for tess */ /* Face culling state needs to be specialized for tess */
ctx->dirty |= AGX_DIRTY_RS; ctx->dirty |= AGX_DIRTY_RS;
@ -5307,7 +5311,8 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
if (indirect) { if (indirect) {
uint64_t addr = agx_get_query_address(batch, statistic); uint64_t addr = agx_get_query_address(batch, statistic);
libagx_increment_cs_invocations(batch, agx_1d(1), indirect, addr, libagx_increment_cs_invocations(batch, agx_1d(1), AGX_BARRIER_ALL,
indirect, addr,
agx_workgroup_threads(wg)); agx_workgroup_threads(wg));
} else { } else {
agx_query_increment_cpu(ctx, statistic, agx_query_increment_cpu(ctx, statistic,
@ -5435,7 +5440,8 @@ agx_decompress_inplace(struct agx_batch *batch, struct pipe_surface *surf,
ail_metadata_height_tl(layout, level), ail_metadata_height_tl(layout, level),
surf->u.tex.last_layer - surf->u.tex.first_layer + 1); surf->u.tex.last_layer - surf->u.tex.first_layer + 1);
libagx_decompress(batch, grid, layout, surf->u.tex.first_layer, level, libagx_decompress(batch, grid, AGX_BARRIER_ALL, layout,
surf->u.tex.first_layer, level,
agx_map_texture_gpu(rsrc, 0), images.gpu); agx_map_texture_gpu(rsrc, 0), images.gpu);
} }

View file

@ -796,8 +796,8 @@ void agx_launch(struct agx_batch *batch, struct agx_grid grid,
unsigned variable_shared_mem); unsigned variable_shared_mem);
void agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid, void agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid,
enum libagx_program program, void *args, enum agx_barrier barrier, enum libagx_program program,
size_t arg_size); void *args, size_t arg_size);
#define MESA_DISPATCH_PRECOMP agx_launch_precomp #define MESA_DISPATCH_PRECOMP agx_launch_precomp