panfrost: Remove progress_increment from all CS builders

Progression logic is deprecated since v11 and we don't plan to use it.
Let's get ride of all increment logic on all instructions.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Acked-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34947>
This commit is contained in:
Mary Guillemard 2025-05-13 12:56:25 +02:00 committed by Marge Bot
parent 5ba982f166
commit 53f780ec91
11 changed files with 164 additions and 182 deletions

View file

@ -148,7 +148,7 @@ csf_oom_handler_init(struct panfrost_context *ctx)
/* Use different framebuffer descriptor depending on whether incremental /* Use different framebuffer descriptor depending on whether incremental
* rendering has already been triggered */ * rendering has already been triggered */
cs_load32_to(&b, counter, tiler_oom_ctx, FIELD_OFFSET(counter)); cs_load32_to(&b, counter, tiler_oom_ctx, FIELD_OFFSET(counter));
cs_wait_slot(&b, 0, false); cs_wait_slot(&b, 0);
cs_if(&b, MALI_CS_CONDITION_GREATER, counter) { cs_if(&b, MALI_CS_CONDITION_GREATER, counter) {
cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx, cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
FBD_OFFSET(MIDDLE)); FBD_OFFSET(MIDDLE));
@ -164,12 +164,12 @@ csf_oom_handler_init(struct panfrost_context *ctx)
FIELD_OFFSET(bbox_max)); FIELD_OFFSET(bbox_max));
cs_move64_to(&b, cs_sr_reg64(&b, FRAGMENT, TEM_POINTER), 0); cs_move64_to(&b, cs_sr_reg64(&b, FRAGMENT, TEM_POINTER), 0);
cs_move32_to(&b, cs_sr_reg32(&b, FRAGMENT, TEM_ROW_STRIDE), 0); cs_move32_to(&b, cs_sr_reg32(&b, FRAGMENT, TEM_ROW_STRIDE), 0);
cs_wait_slot(&b, 0, false); cs_wait_slot(&b, 0);
/* Run the fragment job and wait */ /* Run the fragment job and wait */
cs_select_sb_entries_for_async_ops(&b, 3); cs_select_sb_entries_for_async_ops(&b, 3);
cs_run_fragment(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER, false); cs_run_fragment(&b, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
cs_wait_slot(&b, 3, false); cs_wait_slot(&b, 3);
/* Increment counter */ /* Increment counter */
cs_add32(&b, counter, counter, 1); cs_add32(&b, counter, counter, 1);
@ -177,9 +177,9 @@ csf_oom_handler_init(struct panfrost_context *ctx)
/* Load completed chunks */ /* Load completed chunks */
cs_load64_to(&b, tiler_ctx, tiler_oom_ctx, FIELD_OFFSET(tiler_desc)); cs_load64_to(&b, tiler_ctx, tiler_oom_ctx, FIELD_OFFSET(tiler_desc));
cs_wait_slot(&b, 0, false); cs_wait_slot(&b, 0);
cs_load_to(&b, completed_chunks, tiler_ctx, BITFIELD_MASK(4), 10 * 4); cs_load_to(&b, completed_chunks, tiler_ctx, BITFIELD_MASK(4), 10 * 4);
cs_wait_slot(&b, 0, false); cs_wait_slot(&b, 0);
cs_finish_fragment(&b, false, completed_top, completed_bottom, cs_now()); cs_finish_fragment(&b, false, completed_top, completed_bottom, cs_now());
@ -195,7 +195,7 @@ csf_oom_handler_init(struct panfrost_context *ctx)
MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, flush_id, MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, flush_id,
cs_defer(0, 0)); cs_defer(0, 0));
cs_wait_slot(&b, 0, false); cs_wait_slot(&b, 0);
cs_select_sb_entries_for_async_ops(&b, 2); cs_select_sb_entries_for_async_ops(&b, 2);
} }
@ -347,7 +347,7 @@ csf_emit_batch_end(struct panfrost_batch *batch)
struct cs_builder *b = batch->csf.cs.builder; struct cs_builder *b = batch->csf.cs.builder;
/* Barrier to let everything finish */ /* Barrier to let everything finish */
cs_wait_slots(b, BITFIELD_MASK(8), false); cs_wait_slots(b, BITFIELD_MASK(8));
if (dev->debug & PAN_DBG_SYNC) { if (dev->debug & PAN_DBG_SYNC) {
/* Get the CS state */ /* Get the CS state */
@ -367,7 +367,7 @@ csf_emit_batch_end(struct panfrost_batch *batch)
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN, cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN,
MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, flush_id, MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, flush_id,
cs_defer(0, 0)); cs_defer(0, 0));
cs_wait_slot(b, 0, false); cs_wait_slot(b, 0);
/* Finish the command stream */ /* Finish the command stream */
if (!cs_is_valid(batch->csf.cs.builder)) if (!cs_is_valid(batch->csf.cs.builder))
@ -821,8 +821,8 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
if (batch->draw_count > 0) { if (batch->draw_count > 0) {
/* Finish tiling and wait for IDVS and tiling */ /* Finish tiling and wait for IDVS and tiling */
cs_finish_tiling(b, false); cs_finish_tiling(b);
cs_wait_slot(b, 2, false); cs_wait_slot(b, 2);
cs_vt_end(b, cs_now()); cs_vt_end(b, cs_now());
} }
@ -841,7 +841,7 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
if (batch->draw_count > 0) { if (batch->draw_count > 0) {
struct cs_index counter = cs_reg32(b, 78); struct cs_index counter = cs_reg32(b, 78);
cs_load32_to(b, counter, cs_reg64(b, TILER_OOM_CTX_REG), 0); cs_load32_to(b, counter, cs_reg64(b, TILER_OOM_CTX_REG), 0);
cs_wait_slot(b, 0, false); cs_wait_slot(b, 0);
cs_if(b, MALI_CS_CONDITION_GREATER, counter) { cs_if(b, MALI_CS_CONDITION_GREATER, counter) {
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER), cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
GET_FBD(oom_ctx, LAST).gpu); GET_FBD(oom_ctx, LAST).gpu);
@ -849,8 +849,8 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
} }
/* Run the fragment job and wait */ /* Run the fragment job and wait */
cs_run_fragment(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER, false); cs_run_fragment(b, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
cs_wait_slot(b, 2, false); cs_wait_slot(b, 2);
/* Gather freed heap chunks and add them to the heap context free list /* Gather freed heap chunks and add them to the heap context free list
* so they can be re-used next time the tiler heap runs out of chunks. * so they can be re-used next time the tiler heap runs out of chunks.
@ -862,7 +862,7 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
cs_move64_to(b, cs_reg64(b, 90), batch->tiler_ctx.valhall.desc); cs_move64_to(b, cs_reg64(b, 90), batch->tiler_ctx.valhall.desc);
cs_load_to(b, cs_reg_tuple(b, 86, 4), cs_reg64(b, 90), BITFIELD_MASK(4), cs_load_to(b, cs_reg_tuple(b, 86, 4), cs_reg64(b, 90), BITFIELD_MASK(4),
40); 40);
cs_wait_slot(b, 0, false); cs_wait_slot(b, 0);
cs_finish_fragment(b, true, cs_reg64(b, 86), cs_reg64(b, 88), cs_now()); cs_finish_fragment(b, true, cs_reg64(b, 86), cs_reg64(b, 88), cs_now());
} }
} }
@ -950,7 +950,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
cs_load_to(b, grid_xyz, address, BITFIELD_MASK(3), 0); cs_load_to(b, grid_xyz, address, BITFIELD_MASK(3), 0);
/* Wait for the load */ /* Wait for the load */
cs_wait_slot(b, 0, false); cs_wait_slot(b, 0);
/* Copy to FAU */ /* Copy to FAU */
for (unsigned i = 0; i < 3; ++i) { for (unsigned i = 0; i < 3; ++i) {
@ -962,7 +962,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
} }
/* Wait for the stores */ /* Wait for the stores */
cs_wait_slot(b, 0, false); cs_wait_slot(b, 0);
/* Use run_compute with a set task axis instead of run_compute_indirect as /* Use run_compute with a set task axis instead of run_compute_indirect as
* run_compute_indirect has been found to cause intermittent hangs. This * run_compute_indirect has been found to cause intermittent hangs. This
@ -973,7 +973,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
* this is somewhat offset by run_compute being a native instruction. */ * this is somewhat offset by run_compute being a native instruction. */
unsigned task_axis = MALI_TASK_AXIS_X; unsigned task_axis = MALI_TASK_AXIS_X;
cs_run_compute(b, DIV_ROUND_UP(max_thread_cnt, threads_per_wg), task_axis, cs_run_compute(b, DIV_ROUND_UP(max_thread_cnt, threads_per_wg), task_axis,
false, cs_shader_res_sel(0, 0, 0, 0)); cs_shader_res_sel(0, 0, 0, 0));
} else { } else {
/* Set size in workgroups per dimension immediately */ /* Set size in workgroups per dimension immediately */
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), info->grid[0]); cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), info->grid[0]);
@ -1008,7 +1008,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
assert(task_axis <= MALI_TASK_AXIS_Z); assert(task_axis <= MALI_TASK_AXIS_Z);
assert(task_increment > 0); assert(task_increment > 0);
cs_run_compute(b, task_increment, task_axis, false, cs_run_compute(b, task_increment, task_axis,
cs_shader_res_sel(0, 0, 0, 0)); cs_shader_res_sel(0, 0, 0, 0));
} }
} }
@ -1050,10 +1050,10 @@ GENX(csf_launch_xfb)(struct panfrost_batch *batch,
csf_emit_shader_regs(batch, PIPE_SHADER_VERTEX, csf_emit_shader_regs(batch, PIPE_SHADER_VERTEX,
batch->rsd[PIPE_SHADER_VERTEX]); batch->rsd[PIPE_SHADER_VERTEX]);
/* force a barrier to avoid read/write sync issues with buffers */ /* force a barrier to avoid read/write sync issues with buffers */
cs_wait_slot(b, 2, false); cs_wait_slot(b, 2);
/* XXX: Choose correctly */ /* XXX: Choose correctly */
cs_run_compute(b, 1, MALI_TASK_AXIS_Z, false, cs_shader_res_sel(0, 0, 0, 0)); cs_run_compute(b, 1, MALI_TASK_AXIS_Z, cs_shader_res_sel(0, 0, 0, 0));
} }
static void static void
@ -1364,10 +1364,10 @@ GENX(csf_launch_draw)(struct panfrost_batch *batch,
} }
#if PAN_ARCH >= 12 #if PAN_ARCH >= 12
cs_run_idvs2(b, flags_override, false, true, drawid, cs_run_idvs2(b, flags_override, true, drawid,
MALI_IDVS_SHADING_MODE_EARLY); MALI_IDVS_SHADING_MODE_EARLY);
#else #else
cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0), cs_run_idvs(b, flags_override, true, cs_shader_res_sel(0, 0, 1, 0),
cs_shader_res_sel(2, 2, 2, 0), drawid); cs_shader_res_sel(2, 2, 2, 0), drawid);
#endif #endif
} }
@ -1409,12 +1409,12 @@ GENX(csf_launch_draw_indirect)(struct panfrost_batch *batch,
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_BUFFER_SIZE), 0); cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_BUFFER_SIZE), 0);
} }
cs_wait_slot(b, 0, false); cs_wait_slot(b, 0);
#if PAN_ARCH >= 12 #if PAN_ARCH >= 12
cs_run_idvs2(b, flags_override, false, true, drawid, cs_run_idvs2(b, flags_override, true, drawid,
MALI_IDVS_SHADING_MODE_EARLY); MALI_IDVS_SHADING_MODE_EARLY);
#else #else
cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0), cs_run_idvs(b, flags_override, true, cs_shader_res_sel(0, 0, 1, 0),
cs_shader_res_sel(2, 2, 2, 0), drawid); cs_shader_res_sel(2, 2, 2, 0), drawid);
#endif #endif

View file

@ -386,7 +386,6 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch,
assert(task_axis <= MALI_TASK_AXIS_Z); assert(task_axis <= MALI_TASK_AXIS_Z);
assert(task_increment > 0); assert(task_increment > 0);
cs_run_compute(b, task_increment, task_axis, false, cs_run_compute(b, task_increment, task_axis, cs_shader_res_sel(0, 0, 0, 0));
cs_shader_res_sel(0, 0, 0, 0));
#endif #endif
} }

View file

@ -1110,13 +1110,12 @@ cs_move64_to(struct cs_builder *b, struct cs_index dest, uint64_t imm)
} }
static inline void static inline void
cs_wait_slots(struct cs_builder *b, unsigned wait_mask, bool progress_inc) cs_wait_slots(struct cs_builder *b, unsigned wait_mask)
{ {
struct cs_load_store_tracker *ls_tracker = b->conf.ls_tracker; struct cs_load_store_tracker *ls_tracker = b->conf.ls_tracker;
cs_emit(b, WAIT, I) { cs_emit(b, WAIT, I) {
I.wait_mask = wait_mask; I.wait_mask = wait_mask;
I.progress_increment = progress_inc;
} }
/* We don't do advanced tracking of cs_defer(), and assume that /* We don't do advanced tracking of cs_defer(), and assume that
@ -1130,11 +1129,11 @@ cs_wait_slots(struct cs_builder *b, unsigned wait_mask, bool progress_inc)
} }
static inline void static inline void
cs_wait_slot(struct cs_builder *b, unsigned slot, bool progress_inc) cs_wait_slot(struct cs_builder *b, unsigned slot)
{ {
assert(slot < 8 && "invalid slot"); assert(slot < 8 && "invalid slot");
cs_wait_slots(b, BITFIELD_BIT(slot), progress_inc); cs_wait_slots(b, BITFIELD_BIT(slot));
} }
struct cs_shader_res_sel { struct cs_shader_res_sel {
@ -1154,13 +1153,11 @@ cs_shader_res_sel(unsigned srt, unsigned fau, unsigned spd, unsigned tsd)
static inline void static inline void
cs_run_compute(struct cs_builder *b, unsigned task_increment, cs_run_compute(struct cs_builder *b, unsigned task_increment,
enum mali_task_axis task_axis, bool progress_inc, enum mali_task_axis task_axis, struct cs_shader_res_sel res_sel)
struct cs_shader_res_sel res_sel)
{ {
cs_emit(b, RUN_COMPUTE, I) { cs_emit(b, RUN_COMPUTE, I) {
I.task_increment = task_increment; I.task_increment = task_increment;
I.task_axis = task_axis; I.task_axis = task_axis;
I.progress_increment = progress_inc;
I.srt_select = res_sel.srt; I.srt_select = res_sel.srt;
I.spd_select = res_sel.spd; I.spd_select = res_sel.spd;
I.tsd_select = res_sel.tsd; I.tsd_select = res_sel.tsd;
@ -1170,12 +1167,11 @@ cs_run_compute(struct cs_builder *b, unsigned task_increment,
#if PAN_ARCH == 10 #if PAN_ARCH == 10
static inline void static inline void
cs_run_tiling(struct cs_builder *b, uint32_t flags_override, bool progress_inc, cs_run_tiling(struct cs_builder *b, uint32_t flags_override,
struct cs_shader_res_sel res_sel) struct cs_shader_res_sel res_sel)
{ {
cs_emit(b, RUN_TILING, I) { cs_emit(b, RUN_TILING, I) {
I.flags_override = flags_override; I.flags_override = flags_override;
I.progress_increment = progress_inc;
I.srt_select = res_sel.srt; I.srt_select = res_sel.srt;
I.spd_select = res_sel.spd; I.spd_select = res_sel.spd;
I.tsd_select = res_sel.tsd; I.tsd_select = res_sel.tsd;
@ -1186,13 +1182,12 @@ cs_run_tiling(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
#if PAN_ARCH >= 12 #if PAN_ARCH >= 12
static inline void static inline void
cs_run_idvs2(struct cs_builder *b, uint32_t flags_override, bool progress_inc, cs_run_idvs2(struct cs_builder *b, uint32_t flags_override, bool malloc_enable,
bool malloc_enable, struct cs_index draw_id, struct cs_index draw_id,
enum mali_idvs_shading_mode vertex_shading_mode) enum mali_idvs_shading_mode vertex_shading_mode)
{ {
cs_emit(b, RUN_IDVS2, I) { cs_emit(b, RUN_IDVS2, I) {
I.flags_override = flags_override; I.flags_override = flags_override;
I.progress_increment = progress_inc;
I.malloc_enable = malloc_enable; I.malloc_enable = malloc_enable;
I.vertex_shading_mode = vertex_shading_mode; I.vertex_shading_mode = vertex_shading_mode;
@ -1206,13 +1201,12 @@ cs_run_idvs2(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
} }
#else #else
static inline void static inline void
cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool progress_inc, cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool malloc_enable,
bool malloc_enable, struct cs_shader_res_sel varying_sel, struct cs_shader_res_sel varying_sel,
struct cs_shader_res_sel frag_sel, struct cs_index draw_id) struct cs_shader_res_sel frag_sel, struct cs_index draw_id)
{ {
cs_emit(b, RUN_IDVS, I) { cs_emit(b, RUN_IDVS, I) {
I.flags_override = flags_override; I.flags_override = flags_override;
I.progress_increment = progress_inc;
I.malloc_enable = malloc_enable; I.malloc_enable = malloc_enable;
if (draw_id.type == CS_INDEX_UNDEF) { if (draw_id.type == CS_INDEX_UNDEF) {
@ -1242,31 +1236,29 @@ cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
static inline void static inline void
cs_run_fragment(struct cs_builder *b, bool enable_tem, cs_run_fragment(struct cs_builder *b, bool enable_tem,
enum mali_tile_render_order tile_order, bool progress_inc) enum mali_tile_render_order tile_order)
{ {
cs_emit(b, RUN_FRAGMENT, I) { cs_emit(b, RUN_FRAGMENT, I) {
I.enable_tem = enable_tem; I.enable_tem = enable_tem;
I.tile_order = tile_order; I.tile_order = tile_order;
I.progress_increment = progress_inc;
} }
} }
static inline void static inline void
cs_run_fullscreen(struct cs_builder *b, uint32_t flags_override, cs_run_fullscreen(struct cs_builder *b, uint32_t flags_override,
bool progress_inc, struct cs_index dcd) struct cs_index dcd)
{ {
cs_emit(b, RUN_FULLSCREEN, I) { cs_emit(b, RUN_FULLSCREEN, I) {
I.flags_override = flags_override; I.flags_override = flags_override;
I.progress_increment = progress_inc;
I.dcd = cs_src64(b, dcd); I.dcd = cs_src64(b, dcd);
} }
} }
static inline void static inline void
cs_finish_tiling(struct cs_builder *b, bool progress_inc) cs_finish_tiling(struct cs_builder *b)
{ {
cs_emit(b, FINISH_TILING, I) cs_emit(b, FINISH_TILING, I)
I.progress_increment = progress_inc; ;
} }
static inline void static inline void
@ -1596,11 +1588,10 @@ cs_progress_load(struct cs_builder *b, struct cs_index dst)
static inline void static inline void
cs_run_compute_indirect(struct cs_builder *b, unsigned wg_per_task, cs_run_compute_indirect(struct cs_builder *b, unsigned wg_per_task,
bool progress_inc, struct cs_shader_res_sel res_sel) struct cs_shader_res_sel res_sel)
{ {
cs_emit(b, RUN_COMPUTE_INDIRECT, I) { cs_emit(b, RUN_COMPUTE_INDIRECT, I) {
I.workgroups_per_task = wg_per_task; I.workgroups_per_task = wg_per_task;
I.progress_increment = progress_inc;
I.srt_select = res_sel.srt; I.srt_select = res_sel.srt;
I.spd_select = res_sel.spd; I.spd_select = res_sel.spd;
I.tsd_select = res_sel.tsd; I.tsd_select = res_sel.tsd;
@ -1924,7 +1915,7 @@ cs_exception_handler_end(struct cs_builder *b,
cs_load64_to(b, addr_reg, handler->ctx.ctx_reg, cs_load64_to(b, addr_reg, handler->ctx.ctx_reg,
handler->ctx.dump_addr_offset); handler->ctx.dump_addr_offset);
cs_wait_slot(b, handler->ctx.ls_sb_slot, false); cs_wait_slot(b, handler->ctx.ls_sb_slot);
for (unsigned i = 0; i < num_ranges; ++i) { for (unsigned i = 0; i < num_ranges; ++i) {
unsigned reg_count = util_bitcount(masks[i]); unsigned reg_count = util_bitcount(masks[i]);
@ -1933,7 +1924,7 @@ cs_exception_handler_end(struct cs_builder *b,
offset += reg_count * 4; offset += reg_count * 4;
} }
cs_wait_slot(b, handler->ctx.ls_sb_slot, false); cs_wait_slot(b, handler->ctx.ls_sb_slot);
} }
/* Now that the preamble is emitted, we can flush the instructions we have in /* Now that the preamble is emitted, we can flush the instructions we have in
@ -1946,7 +1937,7 @@ cs_exception_handler_end(struct cs_builder *b,
cs_load64_to(b, addr_reg, handler->ctx.ctx_reg, cs_load64_to(b, addr_reg, handler->ctx.ctx_reg,
handler->ctx.dump_addr_offset); handler->ctx.dump_addr_offset);
cs_wait_slot(b, handler->ctx.ls_sb_slot, false); cs_wait_slot(b, handler->ctx.ls_sb_slot);
for (unsigned i = 0; i < num_ranges; ++i) { for (unsigned i = 0; i < num_ranges; ++i) {
unsigned reg_count = util_bitcount(masks[i]); unsigned reg_count = util_bitcount(masks[i]);
@ -1955,7 +1946,7 @@ cs_exception_handler_end(struct cs_builder *b,
offset += reg_count * 4; offset += reg_count * 4;
} }
cs_wait_slot(b, handler->ctx.ls_sb_slot, false); cs_wait_slot(b, handler->ctx.ls_sb_slot);
} }
/* Fill the rest of the buffer with NOPs. */ /* Fill the rest of the buffer with NOPs. */
@ -1992,10 +1983,10 @@ cs_trace_preamble(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
* access. Use cs_trace_field_offset() to get an offset taking this * access. Use cs_trace_field_offset() to get an offset taking this
* pre-increment into account. */ * pre-increment into account. */
cs_load64_to(b, tracebuf_addr, ctx->ctx_reg, ctx->tracebuf_addr_offset); cs_load64_to(b, tracebuf_addr, ctx->ctx_reg, ctx->tracebuf_addr_offset);
cs_wait_slot(b, ctx->ls_sb_slot, false); cs_wait_slot(b, ctx->ls_sb_slot);
cs_add64(b, tracebuf_addr, tracebuf_addr, trace_size); cs_add64(b, tracebuf_addr, tracebuf_addr, trace_size);
cs_store64(b, tracebuf_addr, ctx->ctx_reg, ctx->tracebuf_addr_offset); cs_store64(b, tracebuf_addr, ctx->ctx_reg, ctx->tracebuf_addr_offset);
cs_wait_slot(b, ctx->ls_sb_slot, false); cs_wait_slot(b, ctx->ls_sb_slot);
} }
#define cs_trace_field_offset(__type, __field) \ #define cs_trace_field_offset(__type, __field) \
@ -2010,10 +2001,10 @@ struct cs_run_fragment_trace {
static inline void static inline void
cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx, cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
struct cs_index scratch_regs, bool enable_tem, struct cs_index scratch_regs, bool enable_tem,
enum mali_tile_render_order tile_order, bool progress_inc) enum mali_tile_render_order tile_order)
{ {
if (likely(!ctx->enabled)) { if (likely(!ctx->enabled)) {
cs_run_fragment(b, enable_tem, tile_order, progress_inc); cs_run_fragment(b, enable_tem, tile_order);
return; return;
} }
@ -2026,12 +2017,12 @@ cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP /* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
* won't point to the right instruction. */ * won't point to the right instruction. */
cs_load_ip_to(b, data); cs_load_ip_to(b, data);
cs_run_fragment(b, enable_tem, tile_order, progress_inc); cs_run_fragment(b, enable_tem, tile_order);
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_fragment, ip)); cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_fragment, ip));
cs_store(b, cs_reg_tuple(b, 40, 7), tracebuf_addr, BITFIELD_MASK(7), cs_store(b, cs_reg_tuple(b, 40, 7), tracebuf_addr, BITFIELD_MASK(7),
cs_trace_field_offset(run_fragment, sr)); cs_trace_field_offset(run_fragment, sr));
cs_wait_slot(b, ctx->ls_sb_slot, false); cs_wait_slot(b, ctx->ls_sb_slot);
} }
#if PAN_ARCH >= 12 #if PAN_ARCH >= 12
@ -2045,12 +2036,11 @@ struct cs_run_idvs2_trace {
static inline void static inline void
cs_trace_run_idvs2(struct cs_builder *b, const struct cs_tracing_ctx *ctx, cs_trace_run_idvs2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
struct cs_index scratch_regs, uint32_t flags_override, struct cs_index scratch_regs, uint32_t flags_override,
bool progress_inc, bool malloc_enable, bool malloc_enable, struct cs_index draw_id,
struct cs_index draw_id,
enum mali_idvs_shading_mode vertex_shading_mode) enum mali_idvs_shading_mode vertex_shading_mode)
{ {
if (likely(!ctx->enabled)) { if (likely(!ctx->enabled)) {
cs_run_idvs2(b, flags_override, progress_inc, malloc_enable, draw_id, cs_run_idvs2(b, flags_override, malloc_enable, draw_id,
vertex_shading_mode); vertex_shading_mode);
return; return;
} }
@ -2063,8 +2053,7 @@ cs_trace_run_idvs2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP /* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
* won't point to the right instruction. */ * won't point to the right instruction. */
cs_load_ip_to(b, data); cs_load_ip_to(b, data);
cs_run_idvs2(b, flags_override, progress_inc, malloc_enable, draw_id, cs_run_idvs2(b, flags_override, malloc_enable, draw_id, vertex_shading_mode);
vertex_shading_mode);
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_idvs2, ip)); cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_idvs2, ip));
if (draw_id.type != CS_INDEX_UNDEF) if (draw_id.type != CS_INDEX_UNDEF)
@ -2076,7 +2065,7 @@ cs_trace_run_idvs2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
cs_trace_field_offset(run_idvs2, sr[i])); cs_trace_field_offset(run_idvs2, sr[i]));
cs_store(b, cs_reg_tuple(b, 64, 2), tracebuf_addr, BITFIELD_MASK(2), cs_store(b, cs_reg_tuple(b, 64, 2), tracebuf_addr, BITFIELD_MASK(2),
cs_trace_field_offset(run_idvs2, sr[64])); cs_trace_field_offset(run_idvs2, sr[64]));
cs_wait_slot(b, ctx->ls_sb_slot, false); cs_wait_slot(b, ctx->ls_sb_slot);
} }
#else #else
struct cs_run_idvs_trace { struct cs_run_idvs_trace {
@ -2089,27 +2078,25 @@ struct cs_run_idvs_trace {
static inline void static inline void
cs_trace_run_idvs(struct cs_builder *b, const struct cs_tracing_ctx *ctx, cs_trace_run_idvs(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
struct cs_index scratch_regs, uint32_t flags_override, struct cs_index scratch_regs, uint32_t flags_override,
bool progress_inc, bool malloc_enable, bool malloc_enable, struct cs_shader_res_sel varying_sel,
struct cs_shader_res_sel varying_sel,
struct cs_shader_res_sel frag_sel, struct cs_index draw_id) struct cs_shader_res_sel frag_sel, struct cs_index draw_id)
{ {
if (likely(!ctx->enabled)) { if (likely(!ctx->enabled)) {
cs_run_idvs(b, flags_override, progress_inc, malloc_enable, varying_sel, cs_run_idvs(b, flags_override, malloc_enable, varying_sel, frag_sel,
frag_sel, draw_id); draw_id);
return; return;
} }
struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg); struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
struct cs_index data = cs_reg64(b, scratch_regs.reg + 2); struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
cs_trace_preamble(b, ctx, scratch_regs, cs_trace_preamble(b, ctx, scratch_regs, sizeof(struct cs_run_idvs_trace));
sizeof(struct cs_run_idvs_trace));
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP /* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
* won't point to the right instruction. */ * won't point to the right instruction. */
cs_load_ip_to(b, data); cs_load_ip_to(b, data);
cs_run_idvs(b, flags_override, progress_inc, malloc_enable, varying_sel, cs_run_idvs(b, flags_override, malloc_enable, varying_sel, frag_sel,
frag_sel, draw_id); draw_id);
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_idvs, ip)); cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_idvs, ip));
if (draw_id.type != CS_INDEX_UNDEF) if (draw_id.type != CS_INDEX_UNDEF)
@ -2121,7 +2108,7 @@ cs_trace_run_idvs(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
cs_trace_field_offset(run_idvs, sr[i])); cs_trace_field_offset(run_idvs, sr[i]));
cs_store(b, cs_reg_tuple(b, 48, 13), tracebuf_addr, BITFIELD_MASK(13), cs_store(b, cs_reg_tuple(b, 48, 13), tracebuf_addr, BITFIELD_MASK(13),
cs_trace_field_offset(run_idvs, sr[48])); cs_trace_field_offset(run_idvs, sr[48]));
cs_wait_slot(b, ctx->ls_sb_slot, false); cs_wait_slot(b, ctx->ls_sb_slot);
} }
#endif #endif
@ -2133,24 +2120,23 @@ struct cs_run_compute_trace {
static inline void static inline void
cs_trace_run_compute(struct cs_builder *b, const struct cs_tracing_ctx *ctx, cs_trace_run_compute(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
struct cs_index scratch_regs, unsigned task_increment, struct cs_index scratch_regs, unsigned task_increment,
enum mali_task_axis task_axis, bool progress_inc, enum mali_task_axis task_axis,
struct cs_shader_res_sel res_sel) struct cs_shader_res_sel res_sel)
{ {
if (likely(!ctx->enabled)) { if (likely(!ctx->enabled)) {
cs_run_compute(b, task_increment, task_axis, progress_inc, res_sel); cs_run_compute(b, task_increment, task_axis, res_sel);
return; return;
} }
struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg); struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
struct cs_index data = cs_reg64(b, scratch_regs.reg + 2); struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
cs_trace_preamble(b, ctx, scratch_regs, cs_trace_preamble(b, ctx, scratch_regs, sizeof(struct cs_run_compute_trace));
sizeof(struct cs_run_compute_trace));
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP /* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
* won't point to the right instruction. */ * won't point to the right instruction. */
cs_load_ip_to(b, data); cs_load_ip_to(b, data);
cs_run_compute(b, task_increment, task_axis, progress_inc, res_sel); cs_run_compute(b, task_increment, task_axis, res_sel);
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_compute, ip)); cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_compute, ip));
for (unsigned i = 0; i < 32; i += 16) for (unsigned i = 0; i < 32; i += 16)
@ -2158,31 +2144,30 @@ cs_trace_run_compute(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
cs_trace_field_offset(run_compute, sr[i])); cs_trace_field_offset(run_compute, sr[i]));
cs_store(b, cs_reg_tuple(b, 32, 8), tracebuf_addr, BITFIELD_MASK(8), cs_store(b, cs_reg_tuple(b, 32, 8), tracebuf_addr, BITFIELD_MASK(8),
cs_trace_field_offset(run_compute, sr[32])); cs_trace_field_offset(run_compute, sr[32]));
cs_wait_slot(b, ctx->ls_sb_slot, false); cs_wait_slot(b, ctx->ls_sb_slot);
} }
static inline void static inline void
cs_trace_run_compute_indirect(struct cs_builder *b, cs_trace_run_compute_indirect(struct cs_builder *b,
const struct cs_tracing_ctx *ctx, const struct cs_tracing_ctx *ctx,
struct cs_index scratch_regs, struct cs_index scratch_regs,
unsigned wg_per_task, bool progress_inc, unsigned wg_per_task,
struct cs_shader_res_sel res_sel) struct cs_shader_res_sel res_sel)
{ {
if (likely(!ctx->enabled)) { if (likely(!ctx->enabled)) {
cs_run_compute_indirect(b, wg_per_task, progress_inc, res_sel); cs_run_compute_indirect(b, wg_per_task, res_sel);
return; return;
} }
struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg); struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
struct cs_index data = cs_reg64(b, scratch_regs.reg + 2); struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
cs_trace_preamble(b, ctx, scratch_regs, cs_trace_preamble(b, ctx, scratch_regs, sizeof(struct cs_run_compute_trace));
sizeof(struct cs_run_compute_trace));
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP /* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
* won't point to the right instruction. */ * won't point to the right instruction. */
cs_load_ip_to(b, data); cs_load_ip_to(b, data);
cs_run_compute_indirect(b, wg_per_task, progress_inc, res_sel); cs_run_compute_indirect(b, wg_per_task, res_sel);
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_compute, ip)); cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_compute, ip));
for (unsigned i = 0; i < 32; i += 16) for (unsigned i = 0; i < 32; i += 16)
@ -2190,5 +2175,5 @@ cs_trace_run_compute_indirect(struct cs_builder *b,
cs_trace_field_offset(run_compute, sr[i])); cs_trace_field_offset(run_compute, sr[i]));
cs_store(b, cs_reg_tuple(b, 32, 8), tracebuf_addr, BITFIELD_MASK(8), cs_store(b, cs_reg_tuple(b, 32, 8), tracebuf_addr, BITFIELD_MASK(8),
cs_trace_field_offset(run_compute, sr[32])); cs_trace_field_offset(run_compute, sr[32]));
cs_wait_slot(b, ctx->ls_sb_slot, false); cs_wait_slot(b, ctx->ls_sb_slot);
} }

View file

@ -128,11 +128,11 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
struct cs_index flush_id = cs_scratch_reg32(b, 0); struct cs_index flush_id = cs_scratch_reg32(b, 0);
cs_move32_to(b, flush_id, 0); cs_move32_to(b, flush_id, 0);
cs_wait_slots(b, SB_ALL_MASK, false); cs_wait_slots(b, SB_ALL_MASK);
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN, cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN,
MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id, MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id,
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH))); cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
cs_wait_slot(b, SB_ID(IMM_FLUSH), false); cs_wait_slot(b, SB_ID(IMM_FLUSH));
/* If we're in sync/trace more, we signal the debug object. */ /* If we're in sync/trace more, we signal the debug object. */
if (instance->debug_flags & (PANVK_DEBUG_SYNC | PANVK_DEBUG_TRACE)) { if (instance->debug_flags & (PANVK_DEBUG_SYNC | PANVK_DEBUG_TRACE)) {
@ -144,12 +144,12 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
cs_move32_to(b, one, 1); cs_move32_to(b, one, 1);
cs_load64_to(b, debug_sync_addr, cs_subqueue_ctx_reg(b), cs_load64_to(b, debug_sync_addr, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, debug.syncobjs)); offsetof(struct panvk_cs_subqueue_context, debug.syncobjs));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_add64(b, debug_sync_addr, debug_sync_addr, cs_add64(b, debug_sync_addr, debug_sync_addr,
sizeof(struct panvk_cs_sync32) * subqueue); sizeof(struct panvk_cs_sync32) * subqueue);
cs_load32_to(b, error, debug_sync_addr, cs_load32_to(b, error, debug_sync_addr,
offsetof(struct panvk_cs_sync32, error)); offsetof(struct panvk_cs_sync32, error));
cs_wait_slots(b, SB_ALL_MASK, false); cs_wait_slots(b, SB_ALL_MASK);
if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_CSG, one, cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_CSG, one,
debug_sync_addr, cs_now()); debug_sync_addr, cs_now());
@ -162,7 +162,7 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
/* Overwrite the sync error with the first error we encountered. */ /* Overwrite the sync error with the first error we encountered. */
cs_store32(b, error, debug_sync_addr, cs_store32(b, error, debug_sync_addr,
offsetof(struct panvk_cs_sync32, error)); offsetof(struct panvk_cs_sync32, error));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
} }
} }
@ -566,7 +566,7 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
struct panvk_cs_state *cs_state = &cmdbuf->state.cs[i]; struct panvk_cs_state *cs_state = &cmdbuf->state.cs[i];
if (deps.src[i].wait_sb_mask) if (deps.src[i].wait_sb_mask)
cs_wait_slots(b, deps.src[i].wait_sb_mask, false); cs_wait_slots(b, deps.src[i].wait_sb_mask);
struct panvk_cache_flush_info cache_flush = deps.src[i].cache_flush; struct panvk_cache_flush_info cache_flush = deps.src[i].cache_flush;
if (cache_flush.l2 != MALI_CS_FLUSH_MODE_NONE || if (cache_flush.l2 != MALI_CS_FLUSH_MODE_NONE ||
@ -577,7 +577,7 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
cs_move32_to(b, flush_id, 0); cs_move32_to(b, flush_id, 0);
cs_flush_caches(b, cache_flush.l2, cache_flush.lsc, cache_flush.others, cs_flush_caches(b, cache_flush.l2, cache_flush.lsc, cache_flush.others,
flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH))); flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
cs_wait_slot(b, SB_ID(IMM_FLUSH), false); cs_wait_slot(b, SB_ID(IMM_FLUSH));
} }
/* If no one waits on us, there's no point signaling the sync object. */ /* If no one waits on us, there's no point signaling the sync object. */
@ -589,7 +589,7 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b), cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, syncobjs)); offsetof(struct panvk_cs_subqueue_context, syncobjs));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * i); cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * i);
cs_move64_to(b, add_val, 1); cs_move64_to(b, add_val, 1);
cs_sync64_add(b, false, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, cs_sync64_add(b, false, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr,
@ -607,7 +607,7 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b), cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, syncobjs)); offsetof(struct panvk_cs_subqueue_context, syncobjs));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * j); cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * j);
cs_add64(b, wait_val, cs_progress_seqno_reg(b, j), cs_add64(b, wait_val, cs_progress_seqno_reg(b, j),
@ -628,12 +628,12 @@ panvk_per_arch(cs_pick_iter_sb)(struct panvk_cmd_buffer *cmdbuf,
cs_load32_to(b, iter_sb, cs_subqueue_ctx_reg(b), cs_load32_to(b, iter_sb, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, iter_sb)); offsetof(struct panvk_cs_subqueue_context, iter_sb));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_match(b, iter_sb, cmp_scratch) { cs_match(b, iter_sb, cmp_scratch) {
#define CASE(x) \ #define CASE(x) \
cs_case(b, x) { \ cs_case(b, x) { \
cs_wait_slot(b, SB_ITER(x), false); \ cs_wait_slot(b, SB_ITER(x)); \
cs_select_sb_entries_for_async_ops(b, SB_ITER(x)); \ cs_select_sb_entries_for_async_ops(b, SB_ITER(x)); \
} }

View file

@ -227,10 +227,10 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
if (shader->info.tls_size) { if (shader->info.tls_size) {
cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu); cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu);
cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8); cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_move64_to(b, cs_scratch_reg64(b, 0), tsd); cs_move64_to(b, cs_scratch_reg64(b, 0), tsd);
cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8); cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
cs_update_compute_ctx(b) { cs_update_compute_ctx(b) {
@ -279,7 +279,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
cs_scratch_reg64(b, 0), BITFIELD_MASK(3), 0); cs_scratch_reg64(b, 0), BITFIELD_MASK(3), 0);
cs_move64_to(b, cs_scratch_reg64(b, 0), cs_move64_to(b, cs_scratch_reg64(b, 0),
cmdbuf->state.compute.push_uniforms); cmdbuf->state.compute.push_uniforms);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
if (shader_uses_sysval(shader, compute, num_work_groups.x)) { if (shader_uses_sysval(shader, compute, num_work_groups.x)) {
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
@ -302,7 +302,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
shader, sysval_offset(compute, num_work_groups.z))); shader, sysval_offset(compute, num_work_groups.z)));
} }
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} else { } else {
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
info->direct.wg_count.x); info->direct.wg_count.x);
@ -326,7 +326,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
* this is somewhat offset by run_compute being a native instruction. */ * this is somewhat offset by run_compute being a native instruction. */
unsigned task_axis = MALI_TASK_AXIS_X; unsigned task_axis = MALI_TASK_AXIS_X;
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
wg_per_task, task_axis, false, wg_per_task, task_axis,
cs_shader_res_sel(0, 0, 0, 0)); cs_shader_res_sel(0, 0, 0, 0));
} else { } else {
unsigned task_axis = MALI_TASK_AXIS_X; unsigned task_axis = MALI_TASK_AXIS_X;
@ -334,7 +334,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
panvk_per_arch(calculate_task_axis_and_increment)( panvk_per_arch(calculate_task_axis_and_increment)(
shader, phys_dev, &task_axis, &task_increment); shader, phys_dev, &task_axis, &task_increment);
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
task_increment, task_axis, false, task_increment, task_axis,
cs_shader_res_sel(0, 0, 0, 0)); cs_shader_res_sel(0, 0, 0, 0));
} }
cs_req_res(b, 0); cs_req_res(b, 0);
@ -347,7 +347,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b), cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
BITFIELD_MASK(3), BITFIELD_MASK(3),
offsetof(struct panvk_cs_subqueue_context, syncobjs)); offsetof(struct panvk_cs_subqueue_context, syncobjs));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_add64(b, sync_addr, sync_addr, cs_add64(b, sync_addr, sync_addr,
PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64)); PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64));
@ -371,7 +371,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b), cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, iter_sb)); offsetof(struct panvk_cs_subqueue_context, iter_sb));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point; ++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point;
clear_dirty_after_dispatch(cmdbuf); clear_dirty_after_dispatch(cmdbuf);

View file

@ -769,7 +769,7 @@ cs_render_desc_ringbuf_reserve(struct cs_builder *b, uint32_t size)
cs_load64_to( cs_load64_to(
b, ringbuf_sync, cs_subqueue_ctx_reg(b), b, ringbuf_sync, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.syncobj)); offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.syncobj));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
/* Wait for the other end to release memory. */ /* Wait for the other end to release memory. */
cs_move32_to(b, sz_reg, size - 1); cs_move32_to(b, sz_reg, size - 1);
@ -793,7 +793,7 @@ cs_render_desc_ringbuf_move_ptr(struct cs_builder *b, uint32_t size,
b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b), b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b),
BITFIELD_MASK(3), BITFIELD_MASK(3),
offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr)); offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
/* Update the relative position and absolute address. */ /* Update the relative position and absolute address. */
cs_add32(b, ptr_lo, ptr_lo, size); cs_add32(b, ptr_lo, ptr_lo, size);
@ -813,7 +813,7 @@ cs_render_desc_ringbuf_move_ptr(struct cs_builder *b, uint32_t size,
b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b), b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b),
BITFIELD_MASK(3), BITFIELD_MASK(3),
offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr)); offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
static VkResult static VkResult
@ -927,7 +927,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
cs_move64_to(b, cs_scratch_reg64(b, 12), 0); cs_move64_to(b, cs_scratch_reg64(b, 12), 0);
cs_move64_to(b, cs_scratch_reg64(b, 14), 0); cs_move64_to(b, cs_scratch_reg64(b, 14), 0);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
/* Take care of the tiler desc with layer_offset=0 outside of the loop. */ /* Take care of the tiler desc with layer_offset=0 outside of the loop. */
cs_move32_to(b, cs_scratch_reg32(b, 4), cs_move32_to(b, cs_scratch_reg32(b, 4),
@ -942,7 +942,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr, cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr,
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96); BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
uint32_t remaining_layers = uint32_t remaining_layers =
td_count > 1 td_count > 1
@ -970,7 +970,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 64); BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 64);
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr, cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr,
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96); BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_update_vt_ctx(b) cs_update_vt_ctx(b)
cs_add64(b, tiler_ctx_addr, tiler_ctx_addr, cs_add64(b, tiler_ctx_addr, tiler_ctx_addr,
@ -1006,7 +1006,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr, cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr,
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96); BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_add32(b, cs_scratch_reg32(b, 4), cs_scratch_reg32(b, 4), cs_add32(b, cs_scratch_reg32(b, 4), cs_scratch_reg32(b, 4),
MAX_LAYERS_PER_TILER_DESC << 8); MAX_LAYERS_PER_TILER_DESC << 8);
@ -1230,7 +1230,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_load64_to(b, cur_tiler, cs_subqueue_ctx_reg(b), cs_load64_to(b, cur_tiler, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, offsetof(struct panvk_cs_subqueue_context,
render.desc_ringbuf.ptr)); render.desc_ringbuf.ptr));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_add64(b, dst_fbd_ptr, cur_tiler, cs_add64(b, dst_fbd_ptr, cur_tiler,
pan_size(TILER_CONTEXT) * td_count); pan_size(TILER_CONTEXT) * td_count);
} }
@ -1258,10 +1258,10 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16), cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16),
pass_src_fbd_ptr, BITFIELD_MASK(16), fbd_off); pass_src_fbd_ptr, BITFIELD_MASK(16), fbd_off);
} }
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), pass_dst_fbd_ptr, cs_store(b, cs_scratch_reg_tuple(b, 0, 16), pass_dst_fbd_ptr,
BITFIELD_MASK(16), fbd_off); BITFIELD_MASK(16), fbd_off);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
cs_add64(b, pass_src_fbd_ptr, pass_src_fbd_ptr, fbd_ir_pass_offset); cs_add64(b, pass_src_fbd_ptr, pass_src_fbd_ptr, fbd_ir_pass_offset);
cs_add64(b, pass_dst_fbd_ptr, pass_dst_fbd_ptr, fbd_ir_pass_offset); cs_add64(b, pass_dst_fbd_ptr, pass_dst_fbd_ptr, fbd_ir_pass_offset);
@ -1608,15 +1608,15 @@ wrap_prev_oq(struct panvk_cmd_buffer *cmdbuf)
cs_load64_to( cs_load64_to(
b, prev_oq_node_reg, cs_subqueue_ctx_reg(b), b, prev_oq_node_reg, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); offsetof(struct panvk_cs_subqueue_context, render.oq_chain));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_store64(b, prev_oq_node_reg, oq_node_reg, cs_store64(b, prev_oq_node_reg, oq_node_reg,
offsetof(struct panvk_cs_occlusion_query, next)); offsetof(struct panvk_cs_occlusion_query, next));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
cs_store64(b, oq_node_reg, cs_subqueue_ctx_reg(b), cs_store64(b, oq_node_reg, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); offsetof(struct panvk_cs_subqueue_context, render.oq_chain));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
return VK_SUCCESS; return VK_SUCCESS;
} }
@ -2084,11 +2084,11 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
cs_while(b, MALI_CS_CONDITION_GREATER, counter_reg) { cs_while(b, MALI_CS_CONDITION_GREATER, counter_reg) {
#if PAN_ARCH >= 12 #if PAN_ARCH >= 12
cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, cs_undef(), flags_override.opaque[0], true, cs_undef(),
MALI_IDVS_SHADING_MODE_EARLY); MALI_IDVS_SHADING_MODE_EARLY);
#else #else
cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, flags_override.opaque[0], true,
cs_shader_res_sel(0, 0, 1, 0), cs_shader_res_sel(0, 0, 1, 0),
cs_shader_res_sel(2, 2, 2, 0), cs_undef()); cs_shader_res_sel(2, 2, 2, 0), cs_undef());
#endif #endif
@ -2107,11 +2107,11 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
} else { } else {
#if PAN_ARCH >= 12 #if PAN_ARCH >= 12
cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, cs_undef(), flags_override.opaque[0], true, cs_undef(),
MALI_IDVS_SHADING_MODE_EARLY); MALI_IDVS_SHADING_MODE_EARLY);
#else #else
cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, flags_override.opaque[0], true,
cs_shader_res_sel(0, 0, 1, 0), cs_shader_res_sel(0, 0, 1, 0),
cs_shader_res_sel(2, 2, 2, 0), cs_undef()); cs_shader_res_sel(2, 2, 2, 0), cs_undef());
#endif #endif
@ -2264,7 +2264,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
cs_load32_to(b, draw_count, draw_params_addr, 0); cs_load32_to(b, draw_count, draw_params_addr, 0);
/* wait for draw_count to load from buffer */ /* wait for draw_count to load from buffer */
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_umin32(b, draw_count, draw_count, max_draw_count); cs_umin32(b, draw_count, draw_count, max_draw_count);
} else { } else {
cs_move32_to(b, draw_count, draw->indirect.draw_count); cs_move32_to(b, draw_count, draw->indirect.draw_count);
@ -2288,7 +2288,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
} }
/* Wait for the SR33-37 indirect buffer load. */ /* Wait for the SR33-37 indirect buffer load. */
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
if (patch_faus) { if (patch_faus) {
if (shader_uses_sysval(vs, graphics, vs.first_vertex)) { if (shader_uses_sysval(vs, graphics, vs.first_vertex)) {
@ -2305,7 +2305,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
/* Wait for the store using SR-37 as src to finish, so we can /* Wait for the store using SR-37 as src to finish, so we can
* overwrite it. */ * overwrite it. */
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
if (patch_attribs != 0) { if (patch_attribs != 0) {
@ -2322,7 +2322,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
cs_load32_to(b, attrib_offset, vs_drv_set, cs_load32_to(b, attrib_offset, vs_drv_set,
pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t))); pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t)));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
/* Emulated immediate multiply: we walk the bits in /* Emulated immediate multiply: we walk the bits in
* base_instance, and accumulate (stride << bit_pos) if the bit * base_instance, and accumulate (stride << bit_pos) if the bit
@ -2353,7 +2353,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
cs_store32(b, attrib_offset, vs_drv_set, cs_store32(b, attrib_offset, vs_drv_set,
pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t))); pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t)));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
} }
} }
@ -2368,13 +2368,12 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
#if PAN_ARCH >= 12 #if PAN_ARCH >= 12
cs_trace_run_idvs2(b, tracing_ctx, tracing_scratch_regs, cs_trace_run_idvs2(b, tracing_ctx, tracing_scratch_regs,
flags_override.opaque[0], false, true, draw_id, flags_override.opaque[0], true, draw_id,
MALI_IDVS_SHADING_MODE_EARLY); MALI_IDVS_SHADING_MODE_EARLY);
#else #else
cs_trace_run_idvs(b, tracing_ctx, tracing_scratch_regs, cs_trace_run_idvs(
flags_override.opaque[0], false, true, b, tracing_ctx, tracing_scratch_regs, flags_override.opaque[0], true,
cs_shader_res_sel(0, 0, 1, 0), cs_shader_res_sel(0, 0, 1, 0), cs_shader_res_sel(2, 2, 2, 0), draw_id);
cs_shader_res_sel(2, 2, 2, 0), draw_id);
#endif #endif
cs_add32(b, draw_count, draw_count, -1); cs_add32(b, draw_count, draw_count, -1);
@ -2627,7 +2626,7 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
if (cmdbuf->state.gfx.render.tiler || inherits_render_ctx(cmdbuf)) { if (cmdbuf->state.gfx.render.tiler || inherits_render_ctx(cmdbuf)) {
/* Flush the tiling operations and signal the internal sync object. */ /* Flush the tiling operations and signal the internal sync object. */
cs_req_res(b, CS_TILER_RES); cs_req_res(b, CS_TILER_RES);
cs_finish_tiling(b, false); cs_finish_tiling(b);
cs_req_res(b, 0); cs_req_res(b, 0);
struct cs_index sync_addr = cs_scratch_reg64(b, 0); struct cs_index sync_addr = cs_scratch_reg64(b, 0);
@ -2638,7 +2637,7 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b), cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
BITFIELD_MASK(3), BITFIELD_MASK(3),
offsetof(struct panvk_cs_subqueue_context, syncobjs)); offsetof(struct panvk_cs_subqueue_context, syncobjs));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
/* We're relying on PANVK_SUBQUEUE_VERTEX_TILER being the first queue to /* We're relying on PANVK_SUBQUEUE_VERTEX_TILER being the first queue to
* skip an ADD operation on the syncobjs pointer. */ * skip an ADD operation on the syncobjs pointer. */
@ -2669,14 +2668,14 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b), cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, iter_sb)); offsetof(struct panvk_cs_subqueue_context, iter_sb));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
/* Update the vertex seqno. */ /* Update the vertex seqno. */
++cmdbuf->state.cs[PANVK_SUBQUEUE_VERTEX_TILER].relative_sync_point; ++cmdbuf->state.cs[PANVK_SUBQUEUE_VERTEX_TILER].relative_sync_point;
} else { } else {
cs_load64_to(b, render_ctx, cs_subqueue_ctx_reg(b), cs_load64_to(b, render_ctx, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, render)); offsetof(struct panvk_cs_subqueue_context, render));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
} }
@ -2691,7 +2690,7 @@ wait_finish_tiling(struct panvk_cmd_buffer *cmdbuf)
cs_load64_to(b, vt_sync_addr, cs_subqueue_ctx_reg(b), cs_load64_to(b, vt_sync_addr, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, syncobjs)); offsetof(struct panvk_cs_subqueue_context, syncobjs));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_add64(b, vt_sync_point, cs_add64(b, vt_sync_point,
cs_progress_seqno_reg(b, PANVK_SUBQUEUE_VERTEX_TILER), cs_progress_seqno_reg(b, PANVK_SUBQUEUE_VERTEX_TILER),
@ -2750,7 +2749,7 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
cs_store32(b, layer_count, cs_subqueue_ctx_reg(b), cs_store32(b, layer_count, cs_subqueue_ctx_reg(b),
TILER_OOM_CTX_FIELD_OFFSET(layer_count)); TILER_OOM_CTX_FIELD_OFFSET(layer_count));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
static VkResult static VkResult
@ -2837,7 +2836,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_load32_to( cs_load32_to(
b, counter, cs_subqueue_ctx_reg(b), b, counter, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, tiler_oom_ctx.counter)); offsetof(struct panvk_cs_subqueue_context, tiler_oom_ctx.counter));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_if(b, MALI_CS_CONDITION_GREATER, counter) cs_if(b, MALI_CS_CONDITION_GREATER, counter)
cs_update_frag_ctx(b) cs_update_frag_ctx(b)
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER), cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
@ -2854,7 +2853,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_flush_caches(b, MALI_CS_FLUSH_MODE_NONE, MALI_CS_FLUSH_MODE_NONE, cs_flush_caches(b, MALI_CS_FLUSH_MODE_NONE, MALI_CS_FLUSH_MODE_NONE,
MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, length_reg, MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, length_reg,
cs_defer(0x0, SB_ID(IMM_FLUSH))); cs_defer(0x0, SB_ID(IMM_FLUSH)));
cs_wait_slot(b, SB_ID(IMM_FLUSH), false); cs_wait_slot(b, SB_ID(IMM_FLUSH));
} }
cs_req_res(b, CS_FRAG_RES); cs_req_res(b, CS_FRAG_RES);
@ -2864,7 +2863,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_move32_to(b, layer_count, calc_enabled_layer_count(cmdbuf)); cs_move32_to(b, layer_count, calc_enabled_layer_count(cmdbuf));
cs_while(b, MALI_CS_CONDITION_GREATER, layer_count) { cs_while(b, MALI_CS_CONDITION_GREATER, layer_count) {
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
false, MALI_TILE_RENDER_ORDER_Z_ORDER, false); false, MALI_TILE_RENDER_ORDER_Z_ORDER);
cs_add32(b, layer_count, layer_count, -1); cs_add32(b, layer_count, layer_count, -1);
cs_update_frag_ctx(b) cs_update_frag_ctx(b)
@ -2873,7 +2872,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
} }
} else { } else {
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
false, MALI_TILE_RENDER_ORDER_Z_ORDER, false); false, MALI_TILE_RENDER_ORDER_Z_ORDER);
} }
cs_req_res(b, 0); cs_req_res(b, 0);
@ -2907,7 +2906,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
render.desc_ringbuf.syncobj)); render.desc_ringbuf.syncobj));
} }
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_add64(b, sync_addr, sync_addr, cs_add64(b, sync_addr, sync_addr,
PANVK_SUBQUEUE_FRAGMENT * sizeof(struct panvk_cs_sync64)); PANVK_SUBQUEUE_FRAGMENT * sizeof(struct panvk_cs_sync64));
@ -2920,12 +2919,12 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC)); \ cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC)); \
if (td_count == 1) { \ if (td_count == 1) { \
cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); \ cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); \
cs_wait_slot(b, SB_ID(LS), false); \ cs_wait_slot(b, SB_ID(LS)); \
cs_finish_fragment(b, true, completed_top, completed_bottom, async); \ cs_finish_fragment(b, true, completed_top, completed_bottom, async); \
} else if (td_count > 1) { \ } else if (td_count > 1) { \
cs_while(b, MALI_CS_CONDITION_GREATER, tiler_count) { \ cs_while(b, MALI_CS_CONDITION_GREATER, tiler_count) { \
cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); \ cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); \
cs_wait_slot(b, SB_ID(LS), false); \ cs_wait_slot(b, SB_ID(LS)); \
cs_finish_fragment(b, false, completed_top, completed_bottom, \ cs_finish_fragment(b, false, completed_top, completed_bottom, \
async); \ async); \
cs_update_frag_ctx(b) \ cs_update_frag_ctx(b) \
@ -2948,20 +2947,20 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_load64_to( \ cs_load64_to( \
b, oq_chain, cs_subqueue_ctx_reg(b), \ b, oq_chain, cs_subqueue_ctx_reg(b), \
offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); \ offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); \
cs_wait_slot(b, SB_ID(LS), false); \ cs_wait_slot(b, SB_ID(LS)); \
/* We use oq_syncobj as a placeholder to reset the oq_chain. */ \ /* We use oq_syncobj as a placeholder to reset the oq_chain. */ \
cs_move64_to(b, oq_syncobj, 0); \ cs_move64_to(b, oq_syncobj, 0); \
cs_store64( \ cs_store64( \
b, oq_syncobj, cs_subqueue_ctx_reg(b), \ b, oq_syncobj, cs_subqueue_ctx_reg(b), \
offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); \ offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); \
cs_wait_slot(b, SB_ID(LS), false); \ cs_wait_slot(b, SB_ID(LS)); \
cs_while(b, MALI_CS_CONDITION_ALWAYS, cs_undef()) { \ cs_while(b, MALI_CS_CONDITION_ALWAYS, cs_undef()) { \
cs_load64_to(b, oq_syncobj, oq_chain, \ cs_load64_to(b, oq_syncobj, oq_chain, \
offsetof(struct panvk_cs_occlusion_query, syncobj)); \ offsetof(struct panvk_cs_occlusion_query, syncobj)); \
cs_wait_slot(b, SB_ID(LS), false); \ cs_wait_slot(b, SB_ID(LS)); \
cs_load64_to(b, oq_chain, oq_chain, \ cs_load64_to(b, oq_chain, oq_chain, \
offsetof(struct panvk_cs_occlusion_query, next)); \ offsetof(struct panvk_cs_occlusion_query, next)); \
cs_wait_slot(b, SB_ID(LS), false); \ cs_wait_slot(b, SB_ID(LS)); \
cs_sync32_set( \ cs_sync32_set( \
b, true, MALI_CS_SYNC_SCOPE_CSG, add_val_lo, oq_syncobj, \ b, true, MALI_CS_SYNC_SCOPE_CSG, add_val_lo, oq_syncobj, \
cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); \ cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); \
@ -2987,7 +2986,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b), cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, iter_sb)); offsetof(struct panvk_cs_subqueue_context, iter_sb));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
/* Update the ring buffer position. */ /* Update the ring buffer position. */
if (free_render_descs) { if (free_render_descs) {

View file

@ -41,7 +41,7 @@ panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
(i * sizeof(struct panvk_cs_sync32))); (i * sizeof(struct panvk_cs_sync32)));
cs_load32_to(b, seqno, sync_addr, cs_load32_to(b, seqno, sync_addr,
offsetof(struct panvk_cs_sync32, seqno)); offsetof(struct panvk_cs_sync32, seqno));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_match(b, seqno, cmp_scratch) { cs_match(b, seqno, cmp_scratch) {
cs_case(b, 0) { cs_case(b, 0) {
@ -83,7 +83,7 @@ panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
(i * sizeof(struct panvk_cs_sync32))); (i * sizeof(struct panvk_cs_sync32)));
cs_load32_to(b, seqno, sync_addr, cs_load32_to(b, seqno, sync_addr,
offsetof(struct panvk_cs_sync32, seqno)); offsetof(struct panvk_cs_sync32, seqno));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_match(b, seqno, cmp_scratch) { cs_match(b, seqno, cmp_scratch) {
cs_case(b, 0) { cs_case(b, 0) {

View file

@ -95,10 +95,10 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
if (shader->info.tls_size) { if (shader->info.tls_size) {
cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu); cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu);
cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8); cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_move64_to(b, cs_scratch_reg64(b, 0), tsd); cs_move64_to(b, cs_scratch_reg64(b, 0), tsd);
cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8); cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
cs_update_compute_ctx(b) { cs_update_compute_ctx(b) {
@ -146,7 +146,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
panvk_per_arch(calculate_task_axis_and_increment)( panvk_per_arch(calculate_task_axis_and_increment)(
shader, phys_dev, &task_axis, &task_increment); shader, phys_dev, &task_axis, &task_increment);
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
task_increment, task_axis, false, task_increment, task_axis,
cs_shader_res_sel(0, 0, 0, 0)); cs_shader_res_sel(0, 0, 0, 0));
cs_req_res(b, 0); cs_req_res(b, 0);
@ -158,7 +158,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b), cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
BITFIELD_MASK(3), BITFIELD_MASK(3),
offsetof(struct panvk_cs_subqueue_context, syncobjs)); offsetof(struct panvk_cs_subqueue_context, syncobjs));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_add64(b, sync_addr, sync_addr, cs_add64(b, sync_addr, sync_addr,
PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64)); PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64));
@ -182,7 +182,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b), cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, iter_sb)); offsetof(struct panvk_cs_subqueue_context, iter_sb));
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point; ++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point;

View file

@ -93,7 +93,7 @@ panvk_cmd_reset_occlusion_queries(struct panvk_cmd_buffer *cmd,
/* Wait on deferred sync to ensure all prior query operations have /* Wait on deferred sync to ensure all prior query operations have
* completed * completed
*/ */
cs_wait_slot(b, SB_ID(DEFERRED_SYNC), false); cs_wait_slot(b, SB_ID(DEFERRED_SYNC));
struct cs_index addr = cs_scratch_reg64(b, 16); struct cs_index addr = cs_scratch_reg64(b, 16);
struct cs_index zero_regs = cs_scratch_reg_tuple(b, 0, 16); struct cs_index zero_regs = cs_scratch_reg_tuple(b, 0, 16);
@ -113,7 +113,7 @@ panvk_cmd_reset_occlusion_queries(struct panvk_cmd_buffer *cmd,
/* reset_oq_batch() only does the stores, we need to flush those explicitly /* reset_oq_batch() only does the stores, we need to flush those explicitly
* here. */ * here. */
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
/* We flush the caches to make the new value visible to the CPU. */ /* We flush the caches to make the new value visible to the CPU. */
struct cs_index flush_id = cs_scratch_reg32(b, 0); struct cs_index flush_id = cs_scratch_reg32(b, 0);
@ -121,7 +121,7 @@ panvk_cmd_reset_occlusion_queries(struct panvk_cmd_buffer *cmd,
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN, cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN,
MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id, MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id,
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH))); cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
cs_wait_slot(b, SB_ID(IMM_FLUSH), false); cs_wait_slot(b, SB_ID(IMM_FLUSH));
} }
static void static void
@ -152,7 +152,7 @@ panvk_cmd_begin_occlusion_query(struct panvk_cmd_buffer *cmd,
cs_move64_to(b, report_addr_gpu, report_addr); cs_move64_to(b, report_addr_gpu, report_addr);
cs_move64_to(b, clear_value, 0); cs_move64_to(b, clear_value, 0);
cs_store64(b, clear_value, report_addr_gpu, 0); cs_store64(b, clear_value, report_addr_gpu, 0);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
static void static void
@ -219,7 +219,7 @@ copy_oq_result_batch(struct cs_builder *b,
} }
/* Flush the loads. */ /* Flush the loads. */
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
for (uint32_t i = 0; i < query_count; i++) { for (uint32_t i = 0; i < query_count; i++) {
struct cs_index store_src = struct cs_index store_src =
@ -230,7 +230,7 @@ copy_oq_result_batch(struct cs_builder *b,
} }
/* Flush the stores. */ /* Flush the stores. */
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
static void static void
@ -245,7 +245,7 @@ panvk_copy_occlusion_query_results(struct panvk_cmd_buffer *cmd,
/* Wait for occlusion query syncobjs to be signalled. */ /* Wait for occlusion query syncobjs to be signalled. */
if (flags & VK_QUERY_RESULT_WAIT_BIT) if (flags & VK_QUERY_RESULT_WAIT_BIT)
cs_wait_slot(b, SB_ID(DEFERRED_SYNC), false); cs_wait_slot(b, SB_ID(DEFERRED_SYNC));
uint32_t res_size = (flags & VK_QUERY_RESULT_64_BIT) ? 2 : 1; uint32_t res_size = (flags & VK_QUERY_RESULT_64_BIT) ? 2 : 1;
uint32_t regs_per_copy = uint32_t regs_per_copy =

View file

@ -76,7 +76,7 @@ generate_tiler_oom_handler(struct panvk_device *dev,
* rendering has already been triggered */ * rendering has already been triggered */
cs_load32_to(&b, counter, subqueue_ctx, cs_load32_to(&b, counter, subqueue_ctx,
TILER_OOM_CTX_FIELD_OFFSET(counter)); TILER_OOM_CTX_FIELD_OFFSET(counter));
cs_wait_slot(&b, SB_ID(LS), false); cs_wait_slot(&b, SB_ID(LS));
cs_if(&b, MALI_CS_CONDITION_GREATER, counter) cs_if(&b, MALI_CS_CONDITION_GREATER, counter)
cs_load64_to(&b, fbd_ptr, subqueue_ctx, cs_load64_to(&b, fbd_ptr, subqueue_ctx,
@ -87,19 +87,18 @@ generate_tiler_oom_handler(struct panvk_device *dev,
cs_load32_to(&b, layer_count, subqueue_ctx, cs_load32_to(&b, layer_count, subqueue_ctx,
TILER_OOM_CTX_FIELD_OFFSET(layer_count)); TILER_OOM_CTX_FIELD_OFFSET(layer_count));
cs_wait_slot(&b, SB_ID(LS), false); cs_wait_slot(&b, SB_ID(LS));
cs_req_res(&b, CS_FRAG_RES); cs_req_res(&b, CS_FRAG_RES);
cs_while(&b, MALI_CS_CONDITION_GREATER, layer_count) { cs_while(&b, MALI_CS_CONDITION_GREATER, layer_count) {
cs_trace_run_fragment(&b, &tracing_ctx, cs_trace_run_fragment(&b, &tracing_ctx, cs_scratch_reg_tuple(&b, 8, 4),
cs_scratch_reg_tuple(&b, 8, 4), false, false, MALI_TILE_RENDER_ORDER_Z_ORDER);
MALI_TILE_RENDER_ORDER_Z_ORDER, false);
cs_add32(&b, layer_count, layer_count, -1); cs_add32(&b, layer_count, layer_count, -1);
cs_add64(&b, fbd_ptr, fbd_ptr, fbd_size); cs_add64(&b, fbd_ptr, fbd_ptr, fbd_size);
} }
cs_req_res(&b, 0); cs_req_res(&b, 0);
/* Wait for all iter scoreboards for simplicity. */ /* Wait for all iter scoreboards for simplicity. */
cs_wait_slots(&b, SB_ALL_ITERS_MASK, false); cs_wait_slots(&b, SB_ALL_ITERS_MASK);
/* Increment counter */ /* Increment counter */
cs_add32(&b, counter, counter, 1); cs_add32(&b, counter, counter, 1);
@ -111,12 +110,12 @@ generate_tiler_oom_handler(struct panvk_device *dev,
cs_load32_to(&b, td_count, subqueue_ctx, cs_load32_to(&b, td_count, subqueue_ctx,
TILER_OOM_CTX_FIELD_OFFSET(td_count)); TILER_OOM_CTX_FIELD_OFFSET(td_count));
cs_move64_to(&b, zero, 0); cs_move64_to(&b, zero, 0);
cs_wait_slot(&b, SB_ID(LS), false); cs_wait_slot(&b, SB_ID(LS));
cs_while(&b, MALI_CS_CONDITION_GREATER, td_count) { cs_while(&b, MALI_CS_CONDITION_GREATER, td_count) {
/* Load completed chunks */ /* Load completed chunks */
cs_load_to(&b, completed_chunks, tiler_ptr, BITFIELD_MASK(4), 10 * 4); cs_load_to(&b, completed_chunks, tiler_ptr, BITFIELD_MASK(4), 10 * 4);
cs_wait_slot(&b, SB_ID(LS), false); cs_wait_slot(&b, SB_ID(LS));
cs_finish_fragment(&b, false, completed_top, completed_bottom, cs_finish_fragment(&b, false, completed_top, completed_bottom,
cs_now()); cs_now());
@ -136,7 +135,7 @@ generate_tiler_oom_handler(struct panvk_device *dev,
MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, flush_id, MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, flush_id,
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH))); cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
cs_wait_slot(&b, SB_ID(IMM_FLUSH), false); cs_wait_slot(&b, SB_ID(IMM_FLUSH));
} }
assert(cs_is_valid(&b)); assert(cs_is_valid(&b));

View file

@ -31,7 +31,7 @@ cmd_copy_data(struct cs_builder *b, uint64_t dst_addr, uint64_t src_addr,
assert((dst_addr | src_addr | size) % sizeof(uint32_t) == 0); assert((dst_addr | src_addr | size) % sizeof(uint32_t) == 0);
/* wait for timestamp writes */ /* wait for timestamp writes */
cs_wait_slot(b, SB_ID(DEFERRED_SYNC), false); cs_wait_slot(b, SB_ID(DEFERRED_SYNC));
/* Depending on where this is called from, we could potentially use SR /* Depending on where this is called from, we could potentially use SR
* registers or copy with a compute job. * registers or copy with a compute job.
@ -52,7 +52,7 @@ cmd_copy_data(struct cs_builder *b, uint64_t dst_addr, uint64_t src_addr,
const struct cs_index reg = cs_scratch_reg_tuple(b, 4, count); const struct cs_index reg = cs_scratch_reg_tuple(b, 4, count);
cs_load_to(b, reg, src_addr_reg, BITFIELD_MASK(count), offset); cs_load_to(b, reg, src_addr_reg, BITFIELD_MASK(count), offset);
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
cs_store(b, reg, dst_addr_reg, BITFIELD_MASK(count), offset); cs_store(b, reg, dst_addr_reg, BITFIELD_MASK(count), offset);
copy_count -= count; copy_count -= count;
@ -64,7 +64,7 @@ cmd_copy_data(struct cs_builder *b, uint64_t dst_addr, uint64_t src_addr,
size -= offset; size -= offset;
} }
cs_wait_slot(b, SB_ID(LS), false); cs_wait_slot(b, SB_ID(LS));
} }
static struct cs_builder * static struct cs_builder *
@ -170,7 +170,7 @@ panvk_per_arch(utrace_clone_finish_builder)(struct cs_builder *b)
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_NONE, cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_NONE,
MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id, MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id,
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH))); cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
cs_wait_slot(b, SB_ID(IMM_FLUSH), false); cs_wait_slot(b, SB_ID(IMM_FLUSH));
cs_finish(b); cs_finish(b);
} }