mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-13 22:50:26 +01:00
nir,asahi,hk: add barrier argument to MESA_DISPATCH_PRECOMP
In the current API, precomp implicitly assumes full barriers both before & after every dispatch. That's not good for performance. However, dropping the barriers and requiring user to explicitly call barrier functions before/after would have bad ergonomics. So, we add a new parameter to the standard MESA_DISPATCH_PRECOMP signature representing the barriers required around the dispatch. As usual, the actual type & semantic is left to drivers to define what makes sense for their hardware. We just reserve the place for it. (I think most drivers will want bitflags here, but I don't think the actual flags are worth. If a driver wanted to use a struct here, that would work too.) Since the asahi stack doesn't do anything clever with barriers yet, we mechnically add an AGX_BARRIER_ALL barrier to all precomp users in-tree. We can optimize that later, this just gets the flag-day change in with no functional change. For JM panfrost, this will provide a convenient place to stash both their "job barrier" bit and their "suppress prefetch" bit (which is really a sort of barrier / cache flush, if you think about it). Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32980>
This commit is contained in:
parent
4955a68a03
commit
401b400de3
10 changed files with 86 additions and 68 deletions
|
|
@ -260,20 +260,21 @@ agx_fill_decompress_args(struct ail_layout *layout, unsigned layer,
|
|||
}
|
||||
|
||||
#undef libagx_decompress
|
||||
#define libagx_decompress(context, grid, layout, layer, level, ptr, images) \
|
||||
#define libagx_decompress(context, grid, barrier, layout, layer, level, ptr, \
|
||||
images) \
|
||||
libagx_decompress_struct( \
|
||||
context, grid, \
|
||||
context, grid, barrier, \
|
||||
agx_fill_decompress_args(layout, layer, level, ptr, images), \
|
||||
util_logbase2(layout->sample_count_sa))
|
||||
|
||||
#define libagx_tessellate(context, grid, prim, mode, state) \
|
||||
#define libagx_tessellate(context, grid, barrier, prim, mode, state) \
|
||||
if (prim == TESS_PRIMITIVE_QUADS) { \
|
||||
libagx_tess_quad(context, grid, state, mode); \
|
||||
libagx_tess_quad(context, grid, barrier, state, mode); \
|
||||
} else if (prim == TESS_PRIMITIVE_TRIANGLES) { \
|
||||
libagx_tess_tri(context, grid, state, mode); \
|
||||
libagx_tess_tri(context, grid, barrier, state, mode); \
|
||||
} else { \
|
||||
assert(prim == TESS_PRIMITIVE_ISOLINES); \
|
||||
libagx_tess_isoline(context, grid, state, mode); \
|
||||
libagx_tess_isoline(context, grid, barrier, state, mode); \
|
||||
}
|
||||
|
||||
struct agx_border_packed;
|
||||
|
|
|
|||
|
|
@ -709,7 +709,8 @@ hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s,
|
|||
|
||||
void
|
||||
hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid,
|
||||
enum libagx_program idx, void *data, size_t data_size)
|
||||
enum agx_barrier barrier, enum libagx_program idx,
|
||||
void *data, size_t data_size)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cs->cmd);
|
||||
struct agx_precompiled_shader *prog = agx_get_precompiled(&dev->bg_eot, idx);
|
||||
|
|
|
|||
|
|
@ -803,8 +803,9 @@ hk_dispatch_with_local_size(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
hk_dispatch_with_usc(dev, cs, &s->b.info, usc, grid, local_size);
|
||||
}
|
||||
|
||||
void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid gird,
|
||||
enum libagx_program idx, void *data, size_t data_size);
|
||||
void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid,
|
||||
enum agx_barrier barrier, enum libagx_program idx,
|
||||
void *data, size_t data_size);
|
||||
|
||||
#define MESA_DISPATCH_PRECOMP hk_dispatch_precomp
|
||||
|
||||
|
|
|
|||
|
|
@ -103,8 +103,8 @@ dispatch(struct hk_cmd_buffer *cmd, struct agx_grid grid)
|
|||
perf_debug(dev, "CS invocation statistic");
|
||||
uint64_t grid = cmd->state.cs.descriptors.root.cs.group_count_addr;
|
||||
|
||||
libagx_increment_cs_invocations(cs, agx_1d(1), grid, stat,
|
||||
agx_workgroup_threads(local_size));
|
||||
libagx_increment_cs_invocations(cs, agx_1d(1), grid, AGX_BARRIER_ALL,
|
||||
stat, agx_workgroup_threads(local_size));
|
||||
}
|
||||
|
||||
hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);
|
||||
|
|
|
|||
|
|
@ -861,7 +861,8 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
|||
agx_3d(ail_metadata_width_tl(layout, level) * 32,
|
||||
ail_metadata_height_tl(layout, level), layer_count);
|
||||
|
||||
libagx_decompress(cs, grid, layout, layer, level, base,
|
||||
libagx_decompress(cs, grid, AGX_BARRIER_ALL, layout, layer,
|
||||
level, base,
|
||||
hk_pool_upload(cmd, &imgs, sizeof(imgs), 64));
|
||||
}
|
||||
}
|
||||
|
|
@ -1389,8 +1390,8 @@ hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
.zero_sink = dev->rodata.zero_sink,
|
||||
};
|
||||
|
||||
libagx_unroll_restart_struct(cs, agx_1d(1024 * draw_count), ia,
|
||||
draw.index_size, libagx_compact_prim(prim));
|
||||
libagx_unroll_restart_struct(cs, agx_1d(1024 * draw_count), AGX_BARRIER_ALL,
|
||||
ia, draw.index_size, libagx_compact_prim(prim));
|
||||
|
||||
return agx_draw_indexed_indirect(ia.out_draw, dev->heap->va->addr,
|
||||
dev->heap->size, draw.index_size,
|
||||
|
|
@ -1460,7 +1461,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
gsi.index_buffer_range_el = agx_draw_index_range_el(draw);
|
||||
}
|
||||
|
||||
libagx_gs_setup_indirect_struct(cs, agx_1d(1), gsi);
|
||||
libagx_gs_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, gsi);
|
||||
|
||||
grid_vs = agx_grid_indirect(
|
||||
geometry_params + offsetof(struct agx_geometry_params, vs_grid));
|
||||
|
|
@ -1486,7 +1487,8 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
hk_dispatch_with_local_size(cmd, cs, count, grid_gs,
|
||||
agx_workgroup(1, 1, 1));
|
||||
|
||||
libagx_prefix_sum_geom(cs, agx_1d(1024 * count_words), geometry_params);
|
||||
libagx_prefix_sum_geom(cs, agx_1d(1024 * count_words), AGX_BARRIER_ALL,
|
||||
geometry_params);
|
||||
}
|
||||
|
||||
/* Pre-GS shader */
|
||||
|
|
@ -1549,7 +1551,7 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
args.in_index_buffer_range_el = agx_draw_index_range_el(draw);
|
||||
}
|
||||
|
||||
libagx_tess_setup_indirect_struct(cs, agx_1d(1), args);
|
||||
libagx_tess_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, args);
|
||||
|
||||
uint32_t grid_stride = sizeof(uint32_t) * 6;
|
||||
grid_vs = agx_grid_indirect_local(gfx->tess.grids + 0 * grid_stride);
|
||||
|
|
@ -1565,7 +1567,8 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
/* TCS invocation counter increments once per-patch */
|
||||
if (tcs_stat) {
|
||||
perf_debug(dev, "Direct TCS statistic");
|
||||
libagx_increment_statistic(cs, agx_1d(1), tcs_stat, patches);
|
||||
libagx_increment_statistic(cs, agx_1d(1), AGX_BARRIER_ALL, tcs_stat,
|
||||
patches);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1583,10 +1586,13 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
grid_tcs, agx_workgroup(tcs->info.tess.tcs_output_patch_size, 1, 1));
|
||||
|
||||
/* First generate counts, then prefix sum them, and then tessellate. */
|
||||
libagx_tessellate(cs, grid_tess, info.mode, LIBAGX_TESS_MODE_COUNT, state);
|
||||
libagx_prefix_sum_tess(cs, agx_1d(1024), state);
|
||||
libagx_tessellate(cs, grid_tess, info.mode, LIBAGX_TESS_MODE_WITH_COUNTS,
|
||||
state);
|
||||
libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode,
|
||||
LIBAGX_TESS_MODE_COUNT, state);
|
||||
|
||||
libagx_prefix_sum_tess(cs, agx_1d(1024), AGX_BARRIER_ALL, state);
|
||||
|
||||
libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode,
|
||||
LIBAGX_TESS_MODE_WITH_COUNTS, state);
|
||||
|
||||
return agx_draw_indexed_indirect(gfx->tess.out_draws, dev->heap->va->addr,
|
||||
dev->heap->size, AGX_INDEX_SIZE_U32, false);
|
||||
|
|
@ -3358,12 +3364,13 @@ hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw,
|
|||
uint32_t index_size_B = agx_index_size_to_B(draw.index_size);
|
||||
|
||||
libagx_increment_ia_restart(
|
||||
cs, agx_1d(1024), ia_vertices, ia_prims, vs_invocations, c_prims,
|
||||
c_inv, draw_ptr, draw.index_buffer, agx_draw_index_range_el(draw),
|
||||
cmd->state.gfx.index.restart, index_size_B, prim);
|
||||
cs, agx_1d(1024), AGX_BARRIER_ALL, ia_vertices, ia_prims,
|
||||
vs_invocations, c_prims, c_inv, draw_ptr, draw.index_buffer,
|
||||
agx_draw_index_range_el(draw), cmd->state.gfx.index.restart,
|
||||
index_size_B, prim);
|
||||
} else {
|
||||
libagx_increment_ia(cs, agx_1d(1), ia_vertices, ia_prims, vs_invocations,
|
||||
c_prims, c_inv, draw_ptr, prim);
|
||||
libagx_increment_ia(cs, agx_1d(1), AGX_BARRIER_ALL, ia_vertices, ia_prims,
|
||||
vs_invocations, c_prims, c_inv, draw_ptr, prim);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3476,7 +3483,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
|
|||
struct hk_descriptor_state *desc = &cmd->state.gfx.descriptors;
|
||||
|
||||
libagx_draw_without_adj(
|
||||
ccs, agx_1d(1), out_draw, draw.b.ptr,
|
||||
ccs, agx_1d(1), AGX_BARRIER_ALL, out_draw, draw.b.ptr,
|
||||
desc->root.draw.input_assembly, draw.index_buffer,
|
||||
draw.indexed ? agx_draw_index_range_el(draw) : 0,
|
||||
draw.indexed ? agx_index_size_to_B(draw.index_size) : 0, prim);
|
||||
|
|
@ -3503,7 +3510,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
|
|||
size_t size_B = libagx_draw_robust_index_vdm_size();
|
||||
uint64_t target = hk_cs_alloc_for_indirect(cs, size_B);
|
||||
|
||||
libagx_draw_robust_index(ccs, agx_1d(32), target,
|
||||
libagx_draw_robust_index(ccs, agx_1d(32), AGX_BARRIER_ALL, target,
|
||||
hk_geometry_state(cmd), draw.b.ptr,
|
||||
draw.index_buffer, draw.index_buffer_range_B,
|
||||
draw.restart, topology, draw.index_size);
|
||||
|
|
@ -3728,8 +3735,8 @@ hk_draw_indirect_count(VkCommandBuffer commandBuffer, VkBuffer _buffer,
|
|||
uint64_t in = hk_buffer_address(buffer, offset);
|
||||
uint64_t count_addr = hk_buffer_address(count_buffer, countBufferOffset);
|
||||
|
||||
libagx_predicate_indirect(cs, agx_1d(maxDrawCount), patched, in, count_addr,
|
||||
stride / 4, indexed);
|
||||
libagx_predicate_indirect(cs, agx_1d(maxDrawCount), AGX_BARRIER_ALL, patched,
|
||||
in, count_addr, stride / 4, indexed);
|
||||
|
||||
if (indexed) {
|
||||
hk_draw_indexed_indirect_inner(commandBuffer, patched, maxDrawCount,
|
||||
|
|
@ -3847,7 +3854,7 @@ hk_begin_end_xfb(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
|
|||
if (copies > 0) {
|
||||
perf_debug(dev, "XFB counter copy");
|
||||
|
||||
libagx_copy_xfb_counters(cs, agx_1d(copies),
|
||||
libagx_copy_xfb_counters(cs, agx_1d(copies), AGX_BARRIER_ALL,
|
||||
hk_pool_upload(cmd, ¶ms, sizeof(params), 8));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -268,7 +268,7 @@ hk_dispatch_imm_writes(struct hk_cmd_buffer *cmd, struct hk_cs *cs)
|
|||
util_dynarray_num_elements(&cs->imm_writes, struct libagx_imm_write);
|
||||
assert(count > 0);
|
||||
|
||||
libagx_write_u32s(cs, agx_1d(count), params);
|
||||
libagx_write_u32s(cs, agx_1d(count), AGX_BARRIER_ALL, params);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -305,7 +305,7 @@ hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value,
|
|||
hk_cdm_cache_flush(dev, cs);
|
||||
|
||||
perf_debug(dev, "Queued write");
|
||||
libagx_write_u32(cs, agx_1d(1), address, value);
|
||||
libagx_write_u32(cs, agx_1d(1), AGX_BARRIER_ALL, address, value);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -433,7 +433,7 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
|||
if (!after)
|
||||
return;
|
||||
|
||||
libagx_copy_timestamp(after, agx_1d(1), report_addr,
|
||||
libagx_copy_timestamp(after, agx_1d(1), AGX_BARRIER_ALL, report_addr,
|
||||
cs->timestamp.end.addr);
|
||||
} else {
|
||||
cs->timestamp.end = (struct agx_timestamp_req){
|
||||
|
|
@ -702,5 +702,5 @@ hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|||
.with_availability = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT,
|
||||
};
|
||||
|
||||
libagx_copy_query_struct(cs, agx_1d(queryCount), info);
|
||||
libagx_copy_query_struct(cs, agx_1d(queryCount), AGX_BARRIER_ALL, info);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,14 +104,15 @@
|
|||
* implement that mechanism, a driver must implement the following function
|
||||
* signature:
|
||||
*
|
||||
* MESA_DISPATCH_PRECOMP(context, grid, kernel index, argument pointer,
|
||||
* size of arguments)
|
||||
* MESA_DISPATCH_PRECOMP(context, grid, barrier, kernel index,
|
||||
* argument pointer, size of arguments)
|
||||
*
|
||||
* The exact types used are determined by the driver. context is something like
|
||||
* a Vulkan command buffer. grid represents the 3D dispatch size. kernel index
|
||||
* is the index of the precompiled kernel (nir_precomp_index). argument pointer
|
||||
* is a host pointer to the sized argument structure, which the driver must
|
||||
* upload and bind (e.g. as push constants).
|
||||
* a Vulkan command buffer. grid represents the 3D dispatch size. barrier
|
||||
* describes the synchronization and cache flushing required before and after
|
||||
* the dispatch. kernel index is the index of the precompiled kernel
|
||||
* (nir_precomp_index). argument pointer is a host pointer to the sized argument
|
||||
* structure, which the driver must upload and bind (e.g. as push constants).
|
||||
*
|
||||
* Because the types are ambiguous here, the same mechanism works for both
|
||||
* Gallium and Vulkan drivers.
|
||||
|
|
@ -479,7 +480,7 @@ nir_precomp_print_dispatch_macros(FILE *fp, const struct nir_precomp_opts *opt,
|
|||
for (unsigned i = 0; i < 2; ++i) {
|
||||
bool is_struct = i == 0;
|
||||
|
||||
fprintf(fp, "#define %s%s(_context, _grid%s", func->name,
|
||||
fprintf(fp, "#define %s%s(_context, _grid, _barrier%s", func->name,
|
||||
is_struct ? "_struct" : "", is_struct ? ", _data" : "");
|
||||
|
||||
/* Add the arguments, including variant parameters. For struct macros,
|
||||
|
|
@ -523,7 +524,7 @@ nir_precomp_print_dispatch_macros(FILE *fp, const struct nir_precomp_opts *opt,
|
|||
/* Dispatch via MESA_DISPATCH_PRECOMP, which the driver must #define
|
||||
* suitably before #include-ing this file.
|
||||
*/
|
||||
fprintf(fp, " MESA_DISPATCH_PRECOMP(_context, _grid, ");
|
||||
fprintf(fp, " MESA_DISPATCH_PRECOMP(_context, _grid, _barrier, ");
|
||||
nir_precomp_print_enum_value(fp, func);
|
||||
nir_precomp_print_variant_params(fp, func, false);
|
||||
fprintf(fp, ", &_args, sizeof(_args)); \\\n");
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#include "agx_device.h"
|
||||
#include "agx_state.h"
|
||||
#include "libagx.h"
|
||||
#include "libagx_dgc.h"
|
||||
#include "libagx_shaders.h"
|
||||
|
||||
static bool
|
||||
|
|
@ -500,7 +501,7 @@ agx_get_query_result_resource_gpu(struct agx_context *ctx,
|
|||
: copy_type == QUERY_COPY_BOOL32 ? 4
|
||||
: 0;
|
||||
|
||||
libagx_copy_query_gl(batch, agx_1d(1), query->ptr.gpu,
|
||||
libagx_copy_query_gl(batch, agx_1d(1), AGX_BARRIER_ALL, query->ptr.gpu,
|
||||
rsrc->bo->va->addr + offset, result_type, bool_size);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3103,7 +3103,8 @@ agx_launch_internal(struct agx_batch *batch, struct agx_grid grid,
|
|||
|
||||
void
|
||||
agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid,
|
||||
enum libagx_program program, void *args, size_t arg_size)
|
||||
enum agx_barrier barrier, enum libagx_program program,
|
||||
void *args, size_t arg_size)
|
||||
{
|
||||
struct agx_device *dev = agx_device(batch->ctx->base.screen);
|
||||
struct agx_precompiled_shader *cs =
|
||||
|
|
@ -3935,14 +3936,15 @@ agx_ia_update(struct agx_batch *batch, const struct pipe_draw_info *info,
|
|||
perf_debug(dev, "Input assembly counters with primitive restart");
|
||||
|
||||
libagx_increment_ia_restart(
|
||||
batch, agx_1d(1024), ia_vertices, ia_primitives, vs_invocations,
|
||||
c_prims, c_invs, draw, ib, ib_range_el, info->restart_index,
|
||||
info->index_size, info->mode);
|
||||
batch, agx_1d(1024), AGX_BARRIER_ALL, ia_vertices, ia_primitives,
|
||||
vs_invocations, c_prims, c_invs, draw, ib, ib_range_el,
|
||||
info->restart_index, info->index_size, info->mode);
|
||||
} else {
|
||||
perf_debug(dev, "Input assembly counters");
|
||||
|
||||
libagx_increment_ia(batch, agx_1d(1), ia_vertices, ia_primitives,
|
||||
vs_invocations, c_prims, c_invs, draw, info->mode);
|
||||
libagx_increment_ia(batch, agx_1d(1), AGX_BARRIER_ALL, ia_vertices,
|
||||
ia_primitives, vs_invocations, c_prims, c_invs, draw,
|
||||
info->mode);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4146,7 +4148,7 @@ agx_launch_gs_prerast(struct agx_batch *batch,
|
|||
.prim = info->mode,
|
||||
};
|
||||
|
||||
libagx_gs_setup_indirect_struct(batch, agx_1d(1), gsi);
|
||||
libagx_gs_setup_indirect_struct(batch, agx_1d(1), AGX_BARRIER_ALL, gsi);
|
||||
|
||||
wg = agx_workgroup(1, 1, 1);
|
||||
grid_vs =
|
||||
|
|
@ -4172,7 +4174,8 @@ agx_launch_gs_prerast(struct agx_batch *batch,
|
|||
agx_launch(batch, grid_gs, wg, gs->gs_count, NULL, PIPE_SHADER_GEOMETRY,
|
||||
0);
|
||||
|
||||
libagx_prefix_sum_geom(batch, agx_1d(1024 * gs->gs_count_words), gp);
|
||||
libagx_prefix_sum_geom(batch, agx_1d(1024 * gs->gs_count_words),
|
||||
AGX_BARRIER_ALL, gp);
|
||||
}
|
||||
|
||||
/* Pre-GS shader */
|
||||
|
|
@ -4243,9 +4246,9 @@ agx_draw_without_restart(struct agx_batch *batch,
|
|||
};
|
||||
|
||||
/* Unroll the index buffer for each draw */
|
||||
libagx_unroll_restart_struct(batch, agx_1d(1024 * indirect->draw_count),
|
||||
unroll, util_logbase2(info->index_size),
|
||||
libagx_compact_prim(info->mode));
|
||||
libagx_unroll_restart_struct(
|
||||
batch, agx_1d(1024 * indirect->draw_count), AGX_BARRIER_ALL, unroll,
|
||||
util_logbase2(info->index_size), libagx_compact_prim(info->mode));
|
||||
|
||||
/* Now draw the results without restart */
|
||||
struct pipe_draw_info new_info = {
|
||||
|
|
@ -4675,10 +4678,10 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
|
|||
uint64_t grids =
|
||||
agx_pool_alloc_aligned(&batch->pool, grid_stride * 3, 4).gpu;
|
||||
|
||||
libagx_tess_setup_indirect(batch, agx_1d(1), state, grids,
|
||||
0 /* XXX: IA */, indirect_ptr, vertex_out_ptr,
|
||||
0, 0, 0 /* XXX: Index buffer */,
|
||||
ctx->vs->b.info.outputs, tcs_statistic);
|
||||
libagx_tess_setup_indirect(
|
||||
batch, agx_1d(1), AGX_BARRIER_ALL, state, grids, 0 /* XXX: IA */,
|
||||
indirect_ptr, vertex_out_ptr, 0, 0, 0 /* XXX: Index buffer */,
|
||||
ctx->vs->b.info.outputs, tcs_statistic);
|
||||
|
||||
batch->uniforms.vertex_output_buffer_ptr = vertex_out_ptr;
|
||||
|
||||
|
|
@ -4698,10 +4701,11 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
|
|||
batch->uniforms.vertex_output_buffer_ptr = 0;
|
||||
|
||||
/* Generate counts, then prefix sum them, then finally tessellate. */
|
||||
libagx_tessellate(batch, tess_grid, mode, LIBAGX_TESS_MODE_COUNT, state);
|
||||
libagx_prefix_sum_tess(batch, agx_1d(1024), state);
|
||||
libagx_tessellate(batch, tess_grid, mode, LIBAGX_TESS_MODE_WITH_COUNTS,
|
||||
state);
|
||||
libagx_tessellate(batch, tess_grid, AGX_BARRIER_ALL, mode,
|
||||
LIBAGX_TESS_MODE_COUNT, state);
|
||||
libagx_prefix_sum_tess(batch, agx_1d(1024), AGX_BARRIER_ALL, state);
|
||||
libagx_tessellate(batch, tess_grid, AGX_BARRIER_ALL, mode,
|
||||
LIBAGX_TESS_MODE_WITH_COUNTS, state);
|
||||
|
||||
/* Face culling state needs to be specialized for tess */
|
||||
ctx->dirty |= AGX_DIRTY_RS;
|
||||
|
|
@ -5307,7 +5311,8 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
|||
if (indirect) {
|
||||
uint64_t addr = agx_get_query_address(batch, statistic);
|
||||
|
||||
libagx_increment_cs_invocations(batch, agx_1d(1), indirect, addr,
|
||||
libagx_increment_cs_invocations(batch, agx_1d(1), AGX_BARRIER_ALL,
|
||||
indirect, addr,
|
||||
agx_workgroup_threads(wg));
|
||||
} else {
|
||||
agx_query_increment_cpu(ctx, statistic,
|
||||
|
|
@ -5435,7 +5440,8 @@ agx_decompress_inplace(struct agx_batch *batch, struct pipe_surface *surf,
|
|||
ail_metadata_height_tl(layout, level),
|
||||
surf->u.tex.last_layer - surf->u.tex.first_layer + 1);
|
||||
|
||||
libagx_decompress(batch, grid, layout, surf->u.tex.first_layer, level,
|
||||
libagx_decompress(batch, grid, AGX_BARRIER_ALL, layout,
|
||||
surf->u.tex.first_layer, level,
|
||||
agx_map_texture_gpu(rsrc, 0), images.gpu);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -796,8 +796,8 @@ void agx_launch(struct agx_batch *batch, struct agx_grid grid,
|
|||
unsigned variable_shared_mem);
|
||||
|
||||
void agx_launch_precomp(struct agx_batch *batch, struct agx_grid grid,
|
||||
enum libagx_program program, void *args,
|
||||
size_t arg_size);
|
||||
enum agx_barrier barrier, enum libagx_program program,
|
||||
void *args, size_t arg_size);
|
||||
|
||||
#define MESA_DISPATCH_PRECOMP agx_launch_precomp
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue