mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-24 00:50:30 +01:00
panvk: Remove explicit LS waits
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34808>
This commit is contained in:
parent
c28497c355
commit
f75569734e
8 changed files with 31 additions and 81 deletions
|
|
@ -1937,7 +1937,6 @@ cs_exception_handler_end(struct cs_builder *b,
|
|||
|
||||
cs_load64_to(b, addr_reg, handler->ctx.ctx_reg,
|
||||
handler->ctx.dump_addr_offset);
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot);
|
||||
|
||||
for (unsigned i = 0; i < num_ranges; ++i) {
|
||||
unsigned reg_count = util_bitcount(masks[i]);
|
||||
|
|
@ -1946,7 +1945,7 @@ cs_exception_handler_end(struct cs_builder *b,
|
|||
offset += reg_count * 4;
|
||||
}
|
||||
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot);
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
/* Now that the preamble is emitted, we can flush the instructions we have in
|
||||
|
|
@ -1959,7 +1958,6 @@ cs_exception_handler_end(struct cs_builder *b,
|
|||
|
||||
cs_load64_to(b, addr_reg, handler->ctx.ctx_reg,
|
||||
handler->ctx.dump_addr_offset);
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot);
|
||||
|
||||
for (unsigned i = 0; i < num_ranges; ++i) {
|
||||
unsigned reg_count = util_bitcount(masks[i]);
|
||||
|
|
@ -1968,7 +1966,7 @@ cs_exception_handler_end(struct cs_builder *b,
|
|||
offset += reg_count * 4;
|
||||
}
|
||||
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot);
|
||||
cs_flush_loads(b);
|
||||
}
|
||||
|
||||
/* Fill the rest of the buffer with NOPs. */
|
||||
|
|
@ -2005,10 +2003,9 @@ cs_trace_preamble(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
* access. Use cs_trace_field_offset() to get an offset taking this
|
||||
* pre-increment into account. */
|
||||
cs_load64_to(b, tracebuf_addr, ctx->ctx_reg, ctx->tracebuf_addr_offset);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
cs_add64(b, tracebuf_addr, tracebuf_addr, trace_size);
|
||||
cs_store64(b, tracebuf_addr, ctx->ctx_reg, ctx->tracebuf_addr_offset);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
#define cs_trace_field_offset(__type, __field) \
|
||||
|
|
@ -2044,7 +2041,7 @@ cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
|
||||
cs_store(b, cs_reg_tuple(b, 40, 7), tracebuf_addr, BITFIELD_MASK(7),
|
||||
cs_trace_field_offset(run_fragment, sr));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
|
|
@ -2087,7 +2084,7 @@ cs_trace_run_idvs2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
cs_trace_field_offset(run_idvs2, sr[i]));
|
||||
cs_store(b, cs_reg_tuple(b, 64, 2), tracebuf_addr, BITFIELD_MASK(2),
|
||||
cs_trace_field_offset(run_idvs2, sr[64]));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
#else
|
||||
struct cs_run_idvs_trace {
|
||||
|
|
@ -2130,7 +2127,7 @@ cs_trace_run_idvs(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
cs_trace_field_offset(run_idvs, sr[i]));
|
||||
cs_store(b, cs_reg_tuple(b, 48, 13), tracebuf_addr, BITFIELD_MASK(13),
|
||||
cs_trace_field_offset(run_idvs, sr[48]));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -2166,7 +2163,7 @@ cs_trace_run_compute(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
cs_trace_field_offset(run_compute, sr[i]));
|
||||
cs_store(b, cs_reg_tuple(b, 32, 8), tracebuf_addr, BITFIELD_MASK(8),
|
||||
cs_trace_field_offset(run_compute, sr[32]));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
@ -2197,5 +2194,5 @@ cs_trace_run_compute_indirect(struct cs_builder *b,
|
|||
cs_trace_field_offset(run_compute, sr[i]));
|
||||
cs_store(b, cs_reg_tuple(b, 32, 8), tracebuf_addr, BITFIELD_MASK(8),
|
||||
cs_trace_field_offset(run_compute, sr[32]));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -144,7 +144,6 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
|
|||
cs_move32_to(b, one, 1);
|
||||
cs_load64_to(b, debug_sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, debug.syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_add64(b, debug_sync_addr, debug_sync_addr,
|
||||
sizeof(struct panvk_cs_sync32) * subqueue);
|
||||
cs_load32_to(b, error, debug_sync_addr,
|
||||
|
|
@ -162,7 +161,7 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
|
|||
/* Overwrite the sync error with the first error we encountered. */
|
||||
cs_store32(b, error, debug_sync_addr,
|
||||
offsetof(struct panvk_cs_sync32, error));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -589,7 +588,6 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
|||
|
||||
cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * i);
|
||||
cs_move64_to(b, add_val, 1);
|
||||
cs_sync64_add(b, false, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr,
|
||||
|
|
@ -607,7 +605,6 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
|||
|
||||
cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * j);
|
||||
|
||||
cs_add64(b, wait_val, cs_progress_seqno_reg(b, j),
|
||||
|
|
@ -628,7 +625,6 @@ panvk_per_arch(cs_pick_iter_sb)(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
cs_load32_to(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_match(b, iter_sb, cmp_scratch) {
|
||||
#define CASE(x) \
|
||||
|
|
|
|||
|
|
@ -227,10 +227,9 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
if (shader->info.tls_size) {
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu);
|
||||
cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), tsd);
|
||||
cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
cs_update_compute_ctx(b) {
|
||||
|
|
@ -279,7 +278,6 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
cs_scratch_reg64(b, 0), BITFIELD_MASK(3), 0);
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0),
|
||||
cmdbuf->state.compute.push_uniforms);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.x)) {
|
||||
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
|
||||
|
|
@ -302,7 +300,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
shader, sysval_offset(compute, num_work_groups.z)));
|
||||
}
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
} else {
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
|
||||
info->direct.wg_count.x);
|
||||
|
|
@ -345,7 +343,6 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add64(b, sync_addr, sync_addr,
|
||||
PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64));
|
||||
|
|
@ -369,7 +366,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
|
||||
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
|
||||
++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point;
|
||||
clear_dirty_after_dispatch(cmdbuf);
|
||||
|
|
|
|||
|
|
@ -769,7 +769,6 @@ cs_render_desc_ringbuf_reserve(struct cs_builder *b, uint32_t size)
|
|||
cs_load64_to(
|
||||
b, ringbuf_sync, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.syncobj));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Wait for the other end to release memory. */
|
||||
cs_move32_to(b, sz_reg, size - 1);
|
||||
|
|
@ -793,7 +792,6 @@ cs_render_desc_ringbuf_move_ptr(struct cs_builder *b, uint32_t size,
|
|||
b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Update the relative position and absolute address. */
|
||||
cs_add32(b, ptr_lo, ptr_lo, size);
|
||||
|
|
@ -813,7 +811,7 @@ cs_render_desc_ringbuf_move_ptr(struct cs_builder *b, uint32_t size,
|
|||
b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
|
@ -927,8 +925,6 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_move64_to(b, cs_scratch_reg64(b, 12), 0);
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 14), 0);
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Take care of the tiler desc with layer_offset=0 outside of the loop. */
|
||||
cs_move32_to(b, cs_scratch_reg32(b, 4),
|
||||
MIN2(cmdbuf->state.gfx.render.layer_count - 1,
|
||||
|
|
@ -942,8 +938,6 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr,
|
||||
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96);
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
uint32_t remaining_layers =
|
||||
td_count > 1
|
||||
? cmdbuf->state.gfx.render.layer_count % MAX_LAYERS_PER_TILER_DESC
|
||||
|
|
@ -970,7 +964,6 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 64);
|
||||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr,
|
||||
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_update_vt_ctx(b)
|
||||
cs_add64(b, tiler_ctx_addr, tiler_ctx_addr,
|
||||
|
|
@ -1006,8 +999,6 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr,
|
||||
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96);
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add32(b, cs_scratch_reg32(b, 4), cs_scratch_reg32(b, 4),
|
||||
MAX_LAYERS_PER_TILER_DESC << 8);
|
||||
|
||||
|
|
@ -1018,6 +1009,9 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
}
|
||||
|
||||
/* Flush all stores to tiler_ctx_addr. */
|
||||
cs_flush_stores(b);
|
||||
|
||||
/* Then we change the scoreboard slot used for iterators. */
|
||||
panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
|
||||
|
||||
|
|
@ -1230,7 +1224,6 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load64_to(b, cur_tiler, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context,
|
||||
render.desc_ringbuf.ptr));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_add64(b, dst_fbd_ptr, cur_tiler,
|
||||
pan_size(TILER_CONTEXT) * td_count);
|
||||
}
|
||||
|
|
@ -1258,16 +1251,17 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16),
|
||||
pass_src_fbd_ptr, BITFIELD_MASK(16), fbd_off);
|
||||
}
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), pass_dst_fbd_ptr,
|
||||
BITFIELD_MASK(16), fbd_off);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
cs_add64(b, pass_src_fbd_ptr, pass_src_fbd_ptr, fbd_ir_pass_offset);
|
||||
cs_add64(b, pass_dst_fbd_ptr, pass_dst_fbd_ptr, fbd_ir_pass_offset);
|
||||
cs_add32(b, pass_count, pass_count, -1);
|
||||
}
|
||||
|
||||
/* Finish stores to pass_dst_fbd_ptr. */
|
||||
cs_flush_stores(b);
|
||||
|
||||
cs_add64(b, src_fbd_ptr, src_fbd_ptr, fbd_sz);
|
||||
cs_update_frag_ctx(b)
|
||||
cs_add64(b, dst_fbd_ptr, dst_fbd_ptr, fbd_sz);
|
||||
|
|
@ -1608,15 +1602,13 @@ wrap_prev_oq(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load64_to(
|
||||
b, prev_oq_node_reg, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.oq_chain));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_store64(b, prev_oq_node_reg, oq_node_reg,
|
||||
offsetof(struct panvk_cs_occlusion_query, next));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
cs_store64(b, oq_node_reg, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.oq_chain));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -2260,9 +2252,6 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
cs_move32_to(b, max_draw_count, draw->indirect.draw_count);
|
||||
cs_move64_to(b, draw_params_addr, draw->indirect.count_buffer_dev_addr);
|
||||
cs_load32_to(b, draw_count, draw_params_addr, 0);
|
||||
|
||||
/* wait for draw_count to load from buffer */
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_umin32(b, draw_count, draw_count, max_draw_count);
|
||||
} else {
|
||||
cs_move32_to(b, draw_count, draw->indirect.draw_count);
|
||||
|
|
@ -2283,9 +2272,6 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
draw_params_addr, reg_mask, 0);
|
||||
}
|
||||
|
||||
/* Wait for the SR33-37 indirect buffer load. */
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
if (patch_faus) {
|
||||
if (shader_uses_sysval(vs, graphics, vs.first_vertex)) {
|
||||
cs_store32(b, cs_sr_reg32(b, IDVS, VERTEX_OFFSET), vs_fau_addr,
|
||||
|
|
@ -2298,10 +2284,6 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
shader_remapped_sysval_offset(
|
||||
vs, sysval_offset(graphics, vs.base_instance)));
|
||||
}
|
||||
|
||||
/* Wait for the store using SR-37 as src to finish, so we can
|
||||
* overwrite it. */
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
if (patch_attribs != 0) {
|
||||
|
|
@ -2318,7 +2300,6 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
cs_load32_to(b, attrib_offset, vs_drv_set,
|
||||
pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t)));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Emulated immediate multiply: we walk the bits in
|
||||
* base_instance, and accumulate (stride << bit_pos) if the bit
|
||||
|
|
@ -2349,7 +2330,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
cs_store32(b, attrib_offset, vs_drv_set,
|
||||
pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t)));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2629,7 +2610,6 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* We're relying on PANVK_SUBQUEUE_VERTEX_TILER being the first queue to
|
||||
* skip an ADD operation on the syncobjs pointer. */
|
||||
|
|
@ -2660,14 +2640,13 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
|
||||
/* Update the vertex seqno. */
|
||||
++cmdbuf->state.cs[PANVK_SUBQUEUE_VERTEX_TILER].relative_sync_point;
|
||||
} else {
|
||||
cs_load64_to(b, render_ctx, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, render));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2682,7 +2661,6 @@ wait_finish_tiling(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_load64_to(b, vt_sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add64(b, vt_sync_point,
|
||||
cs_progress_seqno_reg(b, PANVK_SUBQUEUE_VERTEX_TILER),
|
||||
|
|
@ -2741,7 +2719,7 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_store32(b, layer_count, cs_subqueue_ctx_reg(b),
|
||||
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
|
@ -2828,7 +2806,6 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load32_to(
|
||||
b, counter, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, tiler_oom_ctx.counter));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_if(b, MALI_CS_CONDITION_GREATER, counter)
|
||||
cs_update_frag_ctx(b)
|
||||
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
|
|
@ -2896,8 +2873,6 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
render.desc_ringbuf.syncobj));
|
||||
}
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add64(b, sync_addr, sync_addr,
|
||||
PANVK_SUBQUEUE_FRAGMENT * sizeof(struct panvk_cs_sync64));
|
||||
cs_move32_to(b, tiler_count, td_count);
|
||||
|
|
@ -2909,12 +2884,10 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC)); \
|
||||
if (td_count == 1) { \
|
||||
cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_finish_fragment(b, true, completed_top, completed_bottom, async); \
|
||||
} else if (td_count > 1) { \
|
||||
cs_while(b, MALI_CS_CONDITION_GREATER, tiler_count) { \
|
||||
cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_finish_fragment(b, false, completed_top, completed_bottom, \
|
||||
async); \
|
||||
cs_update_frag_ctx(b) \
|
||||
|
|
@ -2937,20 +2910,18 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load64_to( \
|
||||
b, oq_chain, cs_subqueue_ctx_reg(b), \
|
||||
offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
/* For WAR dependency on subqueue_context.render.oq_chain. */ \
|
||||
cs_flush_loads(b); \
|
||||
/* We use oq_syncobj as a placeholder to reset the oq_chain. */ \
|
||||
cs_move64_to(b, oq_syncobj, 0); \
|
||||
cs_store64( \
|
||||
b, oq_syncobj, cs_subqueue_ctx_reg(b), \
|
||||
offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_while(b, MALI_CS_CONDITION_ALWAYS, cs_undef()) { \
|
||||
cs_load64_to(b, oq_syncobj, oq_chain, \
|
||||
offsetof(struct panvk_cs_occlusion_query, syncobj)); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_load64_to(b, oq_chain, oq_chain, \
|
||||
offsetof(struct panvk_cs_occlusion_query, next)); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_sync32_set( \
|
||||
b, true, MALI_CS_SYNC_SCOPE_CSG, add_val_lo, oq_syncobj, \
|
||||
cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); \
|
||||
|
|
@ -2976,7 +2947,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
|
||||
/* Update the ring buffer position. */
|
||||
if (free_render_descs) {
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
|
|||
(i * sizeof(struct panvk_cs_sync32)));
|
||||
cs_load32_to(b, seqno, sync_addr,
|
||||
offsetof(struct panvk_cs_sync32, seqno));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_match(b, seqno, cmp_scratch) {
|
||||
cs_case(b, 0) {
|
||||
|
|
@ -83,7 +82,6 @@ panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
|
|||
(i * sizeof(struct panvk_cs_sync32)));
|
||||
cs_load32_to(b, seqno, sync_addr,
|
||||
offsetof(struct panvk_cs_sync32, seqno));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_match(b, seqno, cmp_scratch) {
|
||||
cs_case(b, 0) {
|
||||
|
|
|
|||
|
|
@ -95,10 +95,9 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
if (shader->info.tls_size) {
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu);
|
||||
cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), tsd);
|
||||
cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
cs_update_compute_ctx(b) {
|
||||
|
|
@ -156,7 +155,6 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add64(b, sync_addr, sync_addr,
|
||||
PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64));
|
||||
|
|
@ -180,7 +178,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
|
||||
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
|
||||
++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point;
|
||||
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ panvk_cmd_reset_occlusion_queries(struct panvk_cmd_buffer *cmd,
|
|||
|
||||
/* reset_oq_batch() only does the stores, we need to flush those explicitly
|
||||
* here. */
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
|
||||
/* We flush the caches to make the new value visible to the CPU. */
|
||||
struct cs_index flush_id = cs_scratch_reg32(b, 0);
|
||||
|
|
@ -152,7 +152,7 @@ panvk_cmd_begin_occlusion_query(struct panvk_cmd_buffer *cmd,
|
|||
cs_move64_to(b, report_addr_gpu, report_addr);
|
||||
cs_move64_to(b, clear_value, 0);
|
||||
cs_store64(b, clear_value, report_addr_gpu, 0);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -218,9 +218,6 @@ copy_oq_result_batch(struct cs_builder *b,
|
|||
cs_load32_to(b, avail, avail_addr, i * sizeof(struct panvk_cs_sync32));
|
||||
}
|
||||
|
||||
/* Flush the loads. */
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
for (uint32_t i = 0; i < query_count; i++) {
|
||||
struct cs_index store_src =
|
||||
cs_reg_tuple(b, scratch_regs.reg + (i * regs_per_copy), regs_per_copy);
|
||||
|
|
@ -230,7 +227,7 @@ copy_oq_result_batch(struct cs_builder *b,
|
|||
}
|
||||
|
||||
/* Flush the stores. */
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -77,7 +77,6 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
* rendering has already been triggered */
|
||||
cs_load32_to(&b, counter, subqueue_ctx,
|
||||
TILER_OOM_CTX_FIELD_OFFSET(counter));
|
||||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
cs_if(&b, MALI_CS_CONDITION_GREATER, counter)
|
||||
cs_load64_to(&b, fbd_ptr, subqueue_ctx,
|
||||
|
|
@ -88,7 +87,6 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
|
||||
cs_load32_to(&b, layer_count, subqueue_ctx,
|
||||
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
|
||||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
cs_while(&b, MALI_CS_CONDITION_GREATER, layer_count) {
|
||||
cs_trace_run_fragment(&b, &tracing_ctx, cs_scratch_reg_tuple(&b, 8, 4),
|
||||
|
|
@ -109,12 +107,10 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
cs_load32_to(&b, td_count, subqueue_ctx,
|
||||
TILER_OOM_CTX_FIELD_OFFSET(td_count));
|
||||
cs_move64_to(&b, zero, 0);
|
||||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
cs_while(&b, MALI_CS_CONDITION_GREATER, td_count) {
|
||||
/* Load completed chunks */
|
||||
cs_load_to(&b, completed_chunks, tiler_ptr, BITFIELD_MASK(4), 10 * 4);
|
||||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
cs_finish_fragment(&b, false, completed_top, completed_bottom,
|
||||
cs_now());
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue