mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 15:58:05 +02:00
panfrost: Remove progress_increment from all CS builders
Progression logic is deprecated since v11 and we don't plan to use it. Let's get ride of all increment logic on all instructions. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Acked-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34947>
This commit is contained in:
parent
5ba982f166
commit
53f780ec91
11 changed files with 164 additions and 182 deletions
|
|
@ -148,7 +148,7 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
/* Use different framebuffer descriptor depending on whether incremental
|
||||
* rendering has already been triggered */
|
||||
cs_load32_to(&b, counter, tiler_oom_ctx, FIELD_OFFSET(counter));
|
||||
cs_wait_slot(&b, 0, false);
|
||||
cs_wait_slot(&b, 0);
|
||||
cs_if(&b, MALI_CS_CONDITION_GREATER, counter) {
|
||||
cs_load64_to(&b, cs_sr_reg64(&b, FRAGMENT, FBD_POINTER), tiler_oom_ctx,
|
||||
FBD_OFFSET(MIDDLE));
|
||||
|
|
@ -164,12 +164,12 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
FIELD_OFFSET(bbox_max));
|
||||
cs_move64_to(&b, cs_sr_reg64(&b, FRAGMENT, TEM_POINTER), 0);
|
||||
cs_move32_to(&b, cs_sr_reg32(&b, FRAGMENT, TEM_ROW_STRIDE), 0);
|
||||
cs_wait_slot(&b, 0, false);
|
||||
cs_wait_slot(&b, 0);
|
||||
|
||||
/* Run the fragment job and wait */
|
||||
cs_select_sb_entries_for_async_ops(&b, 3);
|
||||
cs_run_fragment(&b, false, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
|
||||
cs_wait_slot(&b, 3, false);
|
||||
cs_run_fragment(&b, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
|
||||
cs_wait_slot(&b, 3);
|
||||
|
||||
/* Increment counter */
|
||||
cs_add32(&b, counter, counter, 1);
|
||||
|
|
@ -177,9 +177,9 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
|
||||
/* Load completed chunks */
|
||||
cs_load64_to(&b, tiler_ctx, tiler_oom_ctx, FIELD_OFFSET(tiler_desc));
|
||||
cs_wait_slot(&b, 0, false);
|
||||
cs_wait_slot(&b, 0);
|
||||
cs_load_to(&b, completed_chunks, tiler_ctx, BITFIELD_MASK(4), 10 * 4);
|
||||
cs_wait_slot(&b, 0, false);
|
||||
cs_wait_slot(&b, 0);
|
||||
|
||||
cs_finish_fragment(&b, false, completed_top, completed_bottom, cs_now());
|
||||
|
||||
|
|
@ -195,7 +195,7 @@ csf_oom_handler_init(struct panfrost_context *ctx)
|
|||
MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, flush_id,
|
||||
cs_defer(0, 0));
|
||||
|
||||
cs_wait_slot(&b, 0, false);
|
||||
cs_wait_slot(&b, 0);
|
||||
|
||||
cs_select_sb_entries_for_async_ops(&b, 2);
|
||||
}
|
||||
|
|
@ -347,7 +347,7 @@ csf_emit_batch_end(struct panfrost_batch *batch)
|
|||
struct cs_builder *b = batch->csf.cs.builder;
|
||||
|
||||
/* Barrier to let everything finish */
|
||||
cs_wait_slots(b, BITFIELD_MASK(8), false);
|
||||
cs_wait_slots(b, BITFIELD_MASK(8));
|
||||
|
||||
if (dev->debug & PAN_DBG_SYNC) {
|
||||
/* Get the CS state */
|
||||
|
|
@ -367,7 +367,7 @@ csf_emit_batch_end(struct panfrost_batch *batch)
|
|||
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN,
|
||||
MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, flush_id,
|
||||
cs_defer(0, 0));
|
||||
cs_wait_slot(b, 0, false);
|
||||
cs_wait_slot(b, 0);
|
||||
|
||||
/* Finish the command stream */
|
||||
if (!cs_is_valid(batch->csf.cs.builder))
|
||||
|
|
@ -821,8 +821,8 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
|
|||
|
||||
if (batch->draw_count > 0) {
|
||||
/* Finish tiling and wait for IDVS and tiling */
|
||||
cs_finish_tiling(b, false);
|
||||
cs_wait_slot(b, 2, false);
|
||||
cs_finish_tiling(b);
|
||||
cs_wait_slot(b, 2);
|
||||
cs_vt_end(b, cs_now());
|
||||
}
|
||||
|
||||
|
|
@ -841,7 +841,7 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
|
|||
if (batch->draw_count > 0) {
|
||||
struct cs_index counter = cs_reg32(b, 78);
|
||||
cs_load32_to(b, counter, cs_reg64(b, TILER_OOM_CTX_REG), 0);
|
||||
cs_wait_slot(b, 0, false);
|
||||
cs_wait_slot(b, 0);
|
||||
cs_if(b, MALI_CS_CONDITION_GREATER, counter) {
|
||||
cs_move64_to(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
GET_FBD(oom_ctx, LAST).gpu);
|
||||
|
|
@ -849,8 +849,8 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
/* Run the fragment job and wait */
|
||||
cs_run_fragment(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
|
||||
cs_wait_slot(b, 2, false);
|
||||
cs_run_fragment(b, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
|
||||
cs_wait_slot(b, 2);
|
||||
|
||||
/* Gather freed heap chunks and add them to the heap context free list
|
||||
* so they can be re-used next time the tiler heap runs out of chunks.
|
||||
|
|
@ -862,7 +862,7 @@ GENX(csf_emit_fragment_job)(struct panfrost_batch *batch,
|
|||
cs_move64_to(b, cs_reg64(b, 90), batch->tiler_ctx.valhall.desc);
|
||||
cs_load_to(b, cs_reg_tuple(b, 86, 4), cs_reg64(b, 90), BITFIELD_MASK(4),
|
||||
40);
|
||||
cs_wait_slot(b, 0, false);
|
||||
cs_wait_slot(b, 0);
|
||||
cs_finish_fragment(b, true, cs_reg64(b, 86), cs_reg64(b, 88), cs_now());
|
||||
}
|
||||
}
|
||||
|
|
@ -950,7 +950,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
|
|||
cs_load_to(b, grid_xyz, address, BITFIELD_MASK(3), 0);
|
||||
|
||||
/* Wait for the load */
|
||||
cs_wait_slot(b, 0, false);
|
||||
cs_wait_slot(b, 0);
|
||||
|
||||
/* Copy to FAU */
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
|
|
@ -962,7 +962,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
/* Wait for the stores */
|
||||
cs_wait_slot(b, 0, false);
|
||||
cs_wait_slot(b, 0);
|
||||
|
||||
/* Use run_compute with a set task axis instead of run_compute_indirect as
|
||||
* run_compute_indirect has been found to cause intermittent hangs. This
|
||||
|
|
@ -973,7 +973,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
|
|||
* this is somewhat offset by run_compute being a native instruction. */
|
||||
unsigned task_axis = MALI_TASK_AXIS_X;
|
||||
cs_run_compute(b, DIV_ROUND_UP(max_thread_cnt, threads_per_wg), task_axis,
|
||||
false, cs_shader_res_sel(0, 0, 0, 0));
|
||||
cs_shader_res_sel(0, 0, 0, 0));
|
||||
} else {
|
||||
/* Set size in workgroups per dimension immediately */
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), info->grid[0]);
|
||||
|
|
@ -1008,7 +1008,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
|
|||
|
||||
assert(task_axis <= MALI_TASK_AXIS_Z);
|
||||
assert(task_increment > 0);
|
||||
cs_run_compute(b, task_increment, task_axis, false,
|
||||
cs_run_compute(b, task_increment, task_axis,
|
||||
cs_shader_res_sel(0, 0, 0, 0));
|
||||
}
|
||||
}
|
||||
|
|
@ -1050,10 +1050,10 @@ GENX(csf_launch_xfb)(struct panfrost_batch *batch,
|
|||
csf_emit_shader_regs(batch, PIPE_SHADER_VERTEX,
|
||||
batch->rsd[PIPE_SHADER_VERTEX]);
|
||||
/* force a barrier to avoid read/write sync issues with buffers */
|
||||
cs_wait_slot(b, 2, false);
|
||||
cs_wait_slot(b, 2);
|
||||
|
||||
/* XXX: Choose correctly */
|
||||
cs_run_compute(b, 1, MALI_TASK_AXIS_Z, false, cs_shader_res_sel(0, 0, 0, 0));
|
||||
cs_run_compute(b, 1, MALI_TASK_AXIS_Z, cs_shader_res_sel(0, 0, 0, 0));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1364,10 +1364,10 @@ GENX(csf_launch_draw)(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
cs_run_idvs2(b, flags_override, false, true, drawid,
|
||||
cs_run_idvs2(b, flags_override, true, drawid,
|
||||
MALI_IDVS_SHADING_MODE_EARLY);
|
||||
#else
|
||||
cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0),
|
||||
cs_run_idvs(b, flags_override, true, cs_shader_res_sel(0, 0, 1, 0),
|
||||
cs_shader_res_sel(2, 2, 2, 0), drawid);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -1409,12 +1409,12 @@ GENX(csf_launch_draw_indirect)(struct panfrost_batch *batch,
|
|||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_BUFFER_SIZE), 0);
|
||||
}
|
||||
|
||||
cs_wait_slot(b, 0, false);
|
||||
cs_wait_slot(b, 0);
|
||||
#if PAN_ARCH >= 12
|
||||
cs_run_idvs2(b, flags_override, false, true, drawid,
|
||||
cs_run_idvs2(b, flags_override, true, drawid,
|
||||
MALI_IDVS_SHADING_MODE_EARLY);
|
||||
#else
|
||||
cs_run_idvs(b, flags_override, false, true, cs_shader_res_sel(0, 0, 1, 0),
|
||||
cs_run_idvs(b, flags_override, true, cs_shader_res_sel(0, 0, 1, 0),
|
||||
cs_shader_res_sel(2, 2, 2, 0), drawid);
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -386,7 +386,6 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch,
|
|||
|
||||
assert(task_axis <= MALI_TASK_AXIS_Z);
|
||||
assert(task_increment > 0);
|
||||
cs_run_compute(b, task_increment, task_axis, false,
|
||||
cs_shader_res_sel(0, 0, 0, 0));
|
||||
cs_run_compute(b, task_increment, task_axis, cs_shader_res_sel(0, 0, 0, 0));
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -598,7 +598,7 @@ cs_flush_block_instrs(struct cs_builder *b)
|
|||
sizeof(uint64_t));
|
||||
|
||||
/* Drop the prev_load_ip_target value and replace it by the final
|
||||
* IP. */
|
||||
* IP. */
|
||||
*instr &= ~BITFIELD64_MASK(32);
|
||||
*instr |= ip;
|
||||
|
||||
|
|
@ -1110,13 +1110,12 @@ cs_move64_to(struct cs_builder *b, struct cs_index dest, uint64_t imm)
|
|||
}
|
||||
|
||||
static inline void
|
||||
cs_wait_slots(struct cs_builder *b, unsigned wait_mask, bool progress_inc)
|
||||
cs_wait_slots(struct cs_builder *b, unsigned wait_mask)
|
||||
{
|
||||
struct cs_load_store_tracker *ls_tracker = b->conf.ls_tracker;
|
||||
|
||||
cs_emit(b, WAIT, I) {
|
||||
I.wait_mask = wait_mask;
|
||||
I.progress_increment = progress_inc;
|
||||
}
|
||||
|
||||
/* We don't do advanced tracking of cs_defer(), and assume that
|
||||
|
|
@ -1130,11 +1129,11 @@ cs_wait_slots(struct cs_builder *b, unsigned wait_mask, bool progress_inc)
|
|||
}
|
||||
|
||||
static inline void
|
||||
cs_wait_slot(struct cs_builder *b, unsigned slot, bool progress_inc)
|
||||
cs_wait_slot(struct cs_builder *b, unsigned slot)
|
||||
{
|
||||
assert(slot < 8 && "invalid slot");
|
||||
|
||||
cs_wait_slots(b, BITFIELD_BIT(slot), progress_inc);
|
||||
cs_wait_slots(b, BITFIELD_BIT(slot));
|
||||
}
|
||||
|
||||
struct cs_shader_res_sel {
|
||||
|
|
@ -1154,13 +1153,11 @@ cs_shader_res_sel(unsigned srt, unsigned fau, unsigned spd, unsigned tsd)
|
|||
|
||||
static inline void
|
||||
cs_run_compute(struct cs_builder *b, unsigned task_increment,
|
||||
enum mali_task_axis task_axis, bool progress_inc,
|
||||
struct cs_shader_res_sel res_sel)
|
||||
enum mali_task_axis task_axis, struct cs_shader_res_sel res_sel)
|
||||
{
|
||||
cs_emit(b, RUN_COMPUTE, I) {
|
||||
I.task_increment = task_increment;
|
||||
I.task_axis = task_axis;
|
||||
I.progress_increment = progress_inc;
|
||||
I.srt_select = res_sel.srt;
|
||||
I.spd_select = res_sel.spd;
|
||||
I.tsd_select = res_sel.tsd;
|
||||
|
|
@ -1170,12 +1167,11 @@ cs_run_compute(struct cs_builder *b, unsigned task_increment,
|
|||
|
||||
#if PAN_ARCH == 10
|
||||
static inline void
|
||||
cs_run_tiling(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
|
||||
cs_run_tiling(struct cs_builder *b, uint32_t flags_override,
|
||||
struct cs_shader_res_sel res_sel)
|
||||
{
|
||||
cs_emit(b, RUN_TILING, I) {
|
||||
I.flags_override = flags_override;
|
||||
I.progress_increment = progress_inc;
|
||||
I.srt_select = res_sel.srt;
|
||||
I.spd_select = res_sel.spd;
|
||||
I.tsd_select = res_sel.tsd;
|
||||
|
|
@ -1186,13 +1182,12 @@ cs_run_tiling(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
|
|||
|
||||
#if PAN_ARCH >= 12
|
||||
static inline void
|
||||
cs_run_idvs2(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
|
||||
bool malloc_enable, struct cs_index draw_id,
|
||||
cs_run_idvs2(struct cs_builder *b, uint32_t flags_override, bool malloc_enable,
|
||||
struct cs_index draw_id,
|
||||
enum mali_idvs_shading_mode vertex_shading_mode)
|
||||
{
|
||||
cs_emit(b, RUN_IDVS2, I) {
|
||||
I.flags_override = flags_override;
|
||||
I.progress_increment = progress_inc;
|
||||
I.malloc_enable = malloc_enable;
|
||||
I.vertex_shading_mode = vertex_shading_mode;
|
||||
|
||||
|
|
@ -1206,13 +1201,12 @@ cs_run_idvs2(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
|
|||
}
|
||||
#else
|
||||
static inline void
|
||||
cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
|
||||
bool malloc_enable, struct cs_shader_res_sel varying_sel,
|
||||
cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool malloc_enable,
|
||||
struct cs_shader_res_sel varying_sel,
|
||||
struct cs_shader_res_sel frag_sel, struct cs_index draw_id)
|
||||
{
|
||||
cs_emit(b, RUN_IDVS, I) {
|
||||
I.flags_override = flags_override;
|
||||
I.progress_increment = progress_inc;
|
||||
I.malloc_enable = malloc_enable;
|
||||
|
||||
if (draw_id.type == CS_INDEX_UNDEF) {
|
||||
|
|
@ -1242,31 +1236,29 @@ cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
|
|||
|
||||
static inline void
|
||||
cs_run_fragment(struct cs_builder *b, bool enable_tem,
|
||||
enum mali_tile_render_order tile_order, bool progress_inc)
|
||||
enum mali_tile_render_order tile_order)
|
||||
{
|
||||
cs_emit(b, RUN_FRAGMENT, I) {
|
||||
I.enable_tem = enable_tem;
|
||||
I.tile_order = tile_order;
|
||||
I.progress_increment = progress_inc;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_run_fullscreen(struct cs_builder *b, uint32_t flags_override,
|
||||
bool progress_inc, struct cs_index dcd)
|
||||
struct cs_index dcd)
|
||||
{
|
||||
cs_emit(b, RUN_FULLSCREEN, I) {
|
||||
I.flags_override = flags_override;
|
||||
I.progress_increment = progress_inc;
|
||||
I.dcd = cs_src64(b, dcd);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_finish_tiling(struct cs_builder *b, bool progress_inc)
|
||||
cs_finish_tiling(struct cs_builder *b)
|
||||
{
|
||||
cs_emit(b, FINISH_TILING, I)
|
||||
I.progress_increment = progress_inc;
|
||||
;
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
@ -1596,11 +1588,10 @@ cs_progress_load(struct cs_builder *b, struct cs_index dst)
|
|||
|
||||
static inline void
|
||||
cs_run_compute_indirect(struct cs_builder *b, unsigned wg_per_task,
|
||||
bool progress_inc, struct cs_shader_res_sel res_sel)
|
||||
struct cs_shader_res_sel res_sel)
|
||||
{
|
||||
cs_emit(b, RUN_COMPUTE_INDIRECT, I) {
|
||||
I.workgroups_per_task = wg_per_task;
|
||||
I.progress_increment = progress_inc;
|
||||
I.srt_select = res_sel.srt;
|
||||
I.spd_select = res_sel.spd;
|
||||
I.tsd_select = res_sel.tsd;
|
||||
|
|
@ -1924,7 +1915,7 @@ cs_exception_handler_end(struct cs_builder *b,
|
|||
|
||||
cs_load64_to(b, addr_reg, handler->ctx.ctx_reg,
|
||||
handler->ctx.dump_addr_offset);
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot, false);
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot);
|
||||
|
||||
for (unsigned i = 0; i < num_ranges; ++i) {
|
||||
unsigned reg_count = util_bitcount(masks[i]);
|
||||
|
|
@ -1933,7 +1924,7 @@ cs_exception_handler_end(struct cs_builder *b,
|
|||
offset += reg_count * 4;
|
||||
}
|
||||
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot, false);
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot);
|
||||
}
|
||||
|
||||
/* Now that the preamble is emitted, we can flush the instructions we have in
|
||||
|
|
@ -1946,7 +1937,7 @@ cs_exception_handler_end(struct cs_builder *b,
|
|||
|
||||
cs_load64_to(b, addr_reg, handler->ctx.ctx_reg,
|
||||
handler->ctx.dump_addr_offset);
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot, false);
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot);
|
||||
|
||||
for (unsigned i = 0; i < num_ranges; ++i) {
|
||||
unsigned reg_count = util_bitcount(masks[i]);
|
||||
|
|
@ -1955,7 +1946,7 @@ cs_exception_handler_end(struct cs_builder *b,
|
|||
offset += reg_count * 4;
|
||||
}
|
||||
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot, false);
|
||||
cs_wait_slot(b, handler->ctx.ls_sb_slot);
|
||||
}
|
||||
|
||||
/* Fill the rest of the buffer with NOPs. */
|
||||
|
|
@ -1992,10 +1983,10 @@ cs_trace_preamble(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
* access. Use cs_trace_field_offset() to get an offset taking this
|
||||
* pre-increment into account. */
|
||||
cs_load64_to(b, tracebuf_addr, ctx->ctx_reg, ctx->tracebuf_addr_offset);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot, false);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
cs_add64(b, tracebuf_addr, tracebuf_addr, trace_size);
|
||||
cs_store64(b, tracebuf_addr, ctx->ctx_reg, ctx->tracebuf_addr_offset);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot, false);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
}
|
||||
|
||||
#define cs_trace_field_offset(__type, __field) \
|
||||
|
|
@ -2010,10 +2001,10 @@ struct cs_run_fragment_trace {
|
|||
static inline void
|
||||
cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
||||
struct cs_index scratch_regs, bool enable_tem,
|
||||
enum mali_tile_render_order tile_order, bool progress_inc)
|
||||
enum mali_tile_render_order tile_order)
|
||||
{
|
||||
if (likely(!ctx->enabled)) {
|
||||
cs_run_fragment(b, enable_tem, tile_order, progress_inc);
|
||||
cs_run_fragment(b, enable_tem, tile_order);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -2026,12 +2017,12 @@ cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
|
||||
* won't point to the right instruction. */
|
||||
cs_load_ip_to(b, data);
|
||||
cs_run_fragment(b, enable_tem, tile_order, progress_inc);
|
||||
cs_run_fragment(b, enable_tem, tile_order);
|
||||
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_fragment, ip));
|
||||
|
||||
cs_store(b, cs_reg_tuple(b, 40, 7), tracebuf_addr, BITFIELD_MASK(7),
|
||||
cs_trace_field_offset(run_fragment, sr));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot, false);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
|
|
@ -2045,12 +2036,11 @@ struct cs_run_idvs2_trace {
|
|||
static inline void
|
||||
cs_trace_run_idvs2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
||||
struct cs_index scratch_regs, uint32_t flags_override,
|
||||
bool progress_inc, bool malloc_enable,
|
||||
struct cs_index draw_id,
|
||||
bool malloc_enable, struct cs_index draw_id,
|
||||
enum mali_idvs_shading_mode vertex_shading_mode)
|
||||
{
|
||||
if (likely(!ctx->enabled)) {
|
||||
cs_run_idvs2(b, flags_override, progress_inc, malloc_enable, draw_id,
|
||||
cs_run_idvs2(b, flags_override, malloc_enable, draw_id,
|
||||
vertex_shading_mode);
|
||||
return;
|
||||
}
|
||||
|
|
@ -2063,8 +2053,7 @@ cs_trace_run_idvs2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
|
||||
* won't point to the right instruction. */
|
||||
cs_load_ip_to(b, data);
|
||||
cs_run_idvs2(b, flags_override, progress_inc, malloc_enable, draw_id,
|
||||
vertex_shading_mode);
|
||||
cs_run_idvs2(b, flags_override, malloc_enable, draw_id, vertex_shading_mode);
|
||||
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_idvs2, ip));
|
||||
|
||||
if (draw_id.type != CS_INDEX_UNDEF)
|
||||
|
|
@ -2076,7 +2065,7 @@ cs_trace_run_idvs2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
cs_trace_field_offset(run_idvs2, sr[i]));
|
||||
cs_store(b, cs_reg_tuple(b, 64, 2), tracebuf_addr, BITFIELD_MASK(2),
|
||||
cs_trace_field_offset(run_idvs2, sr[64]));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot, false);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
}
|
||||
#else
|
||||
struct cs_run_idvs_trace {
|
||||
|
|
@ -2089,27 +2078,25 @@ struct cs_run_idvs_trace {
|
|||
static inline void
|
||||
cs_trace_run_idvs(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
||||
struct cs_index scratch_regs, uint32_t flags_override,
|
||||
bool progress_inc, bool malloc_enable,
|
||||
struct cs_shader_res_sel varying_sel,
|
||||
bool malloc_enable, struct cs_shader_res_sel varying_sel,
|
||||
struct cs_shader_res_sel frag_sel, struct cs_index draw_id)
|
||||
{
|
||||
if (likely(!ctx->enabled)) {
|
||||
cs_run_idvs(b, flags_override, progress_inc, malloc_enable, varying_sel,
|
||||
frag_sel, draw_id);
|
||||
cs_run_idvs(b, flags_override, malloc_enable, varying_sel, frag_sel,
|
||||
draw_id);
|
||||
return;
|
||||
}
|
||||
|
||||
struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
|
||||
struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
|
||||
|
||||
cs_trace_preamble(b, ctx, scratch_regs,
|
||||
sizeof(struct cs_run_idvs_trace));
|
||||
cs_trace_preamble(b, ctx, scratch_regs, sizeof(struct cs_run_idvs_trace));
|
||||
|
||||
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
|
||||
* won't point to the right instruction. */
|
||||
cs_load_ip_to(b, data);
|
||||
cs_run_idvs(b, flags_override, progress_inc, malloc_enable, varying_sel,
|
||||
frag_sel, draw_id);
|
||||
cs_run_idvs(b, flags_override, malloc_enable, varying_sel, frag_sel,
|
||||
draw_id);
|
||||
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_idvs, ip));
|
||||
|
||||
if (draw_id.type != CS_INDEX_UNDEF)
|
||||
|
|
@ -2121,7 +2108,7 @@ cs_trace_run_idvs(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
cs_trace_field_offset(run_idvs, sr[i]));
|
||||
cs_store(b, cs_reg_tuple(b, 48, 13), tracebuf_addr, BITFIELD_MASK(13),
|
||||
cs_trace_field_offset(run_idvs, sr[48]));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot, false);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -2133,24 +2120,23 @@ struct cs_run_compute_trace {
|
|||
static inline void
|
||||
cs_trace_run_compute(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
||||
struct cs_index scratch_regs, unsigned task_increment,
|
||||
enum mali_task_axis task_axis, bool progress_inc,
|
||||
enum mali_task_axis task_axis,
|
||||
struct cs_shader_res_sel res_sel)
|
||||
{
|
||||
if (likely(!ctx->enabled)) {
|
||||
cs_run_compute(b, task_increment, task_axis, progress_inc, res_sel);
|
||||
cs_run_compute(b, task_increment, task_axis, res_sel);
|
||||
return;
|
||||
}
|
||||
|
||||
struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
|
||||
struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
|
||||
|
||||
cs_trace_preamble(b, ctx, scratch_regs,
|
||||
sizeof(struct cs_run_compute_trace));
|
||||
cs_trace_preamble(b, ctx, scratch_regs, sizeof(struct cs_run_compute_trace));
|
||||
|
||||
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
|
||||
* won't point to the right instruction. */
|
||||
cs_load_ip_to(b, data);
|
||||
cs_run_compute(b, task_increment, task_axis, progress_inc, res_sel);
|
||||
cs_run_compute(b, task_increment, task_axis, res_sel);
|
||||
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_compute, ip));
|
||||
|
||||
for (unsigned i = 0; i < 32; i += 16)
|
||||
|
|
@ -2158,31 +2144,30 @@ cs_trace_run_compute(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
|
|||
cs_trace_field_offset(run_compute, sr[i]));
|
||||
cs_store(b, cs_reg_tuple(b, 32, 8), tracebuf_addr, BITFIELD_MASK(8),
|
||||
cs_trace_field_offset(run_compute, sr[32]));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot, false);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_trace_run_compute_indirect(struct cs_builder *b,
|
||||
const struct cs_tracing_ctx *ctx,
|
||||
struct cs_index scratch_regs,
|
||||
unsigned wg_per_task, bool progress_inc,
|
||||
unsigned wg_per_task,
|
||||
struct cs_shader_res_sel res_sel)
|
||||
{
|
||||
if (likely(!ctx->enabled)) {
|
||||
cs_run_compute_indirect(b, wg_per_task, progress_inc, res_sel);
|
||||
cs_run_compute_indirect(b, wg_per_task, res_sel);
|
||||
return;
|
||||
}
|
||||
|
||||
struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
|
||||
struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
|
||||
|
||||
cs_trace_preamble(b, ctx, scratch_regs,
|
||||
sizeof(struct cs_run_compute_trace));
|
||||
cs_trace_preamble(b, ctx, scratch_regs, sizeof(struct cs_run_compute_trace));
|
||||
|
||||
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
|
||||
* won't point to the right instruction. */
|
||||
cs_load_ip_to(b, data);
|
||||
cs_run_compute_indirect(b, wg_per_task, progress_inc, res_sel);
|
||||
cs_run_compute_indirect(b, wg_per_task, res_sel);
|
||||
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_compute, ip));
|
||||
|
||||
for (unsigned i = 0; i < 32; i += 16)
|
||||
|
|
@ -2190,5 +2175,5 @@ cs_trace_run_compute_indirect(struct cs_builder *b,
|
|||
cs_trace_field_offset(run_compute, sr[i]));
|
||||
cs_store(b, cs_reg_tuple(b, 32, 8), tracebuf_addr, BITFIELD_MASK(8),
|
||||
cs_trace_field_offset(run_compute, sr[32]));
|
||||
cs_wait_slot(b, ctx->ls_sb_slot, false);
|
||||
cs_wait_slot(b, ctx->ls_sb_slot);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -128,11 +128,11 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
|
|||
struct cs_index flush_id = cs_scratch_reg32(b, 0);
|
||||
|
||||
cs_move32_to(b, flush_id, 0);
|
||||
cs_wait_slots(b, SB_ALL_MASK, false);
|
||||
cs_wait_slots(b, SB_ALL_MASK);
|
||||
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN,
|
||||
MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id,
|
||||
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH));
|
||||
|
||||
/* If we're in sync/trace more, we signal the debug object. */
|
||||
if (instance->debug_flags & (PANVK_DEBUG_SYNC | PANVK_DEBUG_TRACE)) {
|
||||
|
|
@ -144,12 +144,12 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
|
|||
cs_move32_to(b, one, 1);
|
||||
cs_load64_to(b, debug_sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, debug.syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_add64(b, debug_sync_addr, debug_sync_addr,
|
||||
sizeof(struct panvk_cs_sync32) * subqueue);
|
||||
cs_load32_to(b, error, debug_sync_addr,
|
||||
offsetof(struct panvk_cs_sync32, error));
|
||||
cs_wait_slots(b, SB_ALL_MASK, false);
|
||||
cs_wait_slots(b, SB_ALL_MASK);
|
||||
if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
|
||||
cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_CSG, one,
|
||||
debug_sync_addr, cs_now());
|
||||
|
|
@ -162,7 +162,7 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
|
|||
/* Overwrite the sync error with the first error we encountered. */
|
||||
cs_store32(b, error, debug_sync_addr,
|
||||
offsetof(struct panvk_cs_sync32, error));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -566,7 +566,7 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
|||
struct panvk_cs_state *cs_state = &cmdbuf->state.cs[i];
|
||||
|
||||
if (deps.src[i].wait_sb_mask)
|
||||
cs_wait_slots(b, deps.src[i].wait_sb_mask, false);
|
||||
cs_wait_slots(b, deps.src[i].wait_sb_mask);
|
||||
|
||||
struct panvk_cache_flush_info cache_flush = deps.src[i].cache_flush;
|
||||
if (cache_flush.l2 != MALI_CS_FLUSH_MODE_NONE ||
|
||||
|
|
@ -577,7 +577,7 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
|||
cs_move32_to(b, flush_id, 0);
|
||||
cs_flush_caches(b, cache_flush.l2, cache_flush.lsc, cache_flush.others,
|
||||
flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH));
|
||||
}
|
||||
|
||||
/* If no one waits on us, there's no point signaling the sync object. */
|
||||
|
|
@ -589,7 +589,7 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
|||
|
||||
cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * i);
|
||||
cs_move64_to(b, add_val, 1);
|
||||
cs_sync64_add(b, false, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr,
|
||||
|
|
@ -607,7 +607,7 @@ panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
|||
|
||||
cs_load64_to(b, sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * j);
|
||||
|
||||
cs_add64(b, wait_val, cs_progress_seqno_reg(b, j),
|
||||
|
|
@ -628,12 +628,12 @@ panvk_per_arch(cs_pick_iter_sb)(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
cs_load32_to(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_match(b, iter_sb, cmp_scratch) {
|
||||
#define CASE(x) \
|
||||
cs_case(b, x) { \
|
||||
cs_wait_slot(b, SB_ITER(x), false); \
|
||||
cs_wait_slot(b, SB_ITER(x)); \
|
||||
cs_select_sb_entries_for_async_ops(b, SB_ITER(x)); \
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -227,10 +227,10 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
if (shader->info.tls_size) {
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu);
|
||||
cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), tsd);
|
||||
cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
cs_update_compute_ctx(b) {
|
||||
|
|
@ -279,7 +279,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
cs_scratch_reg64(b, 0), BITFIELD_MASK(3), 0);
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0),
|
||||
cmdbuf->state.compute.push_uniforms);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.x)) {
|
||||
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
|
||||
|
|
@ -302,7 +302,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
shader, sysval_offset(compute, num_work_groups.z)));
|
||||
}
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
} else {
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
|
||||
info->direct.wg_count.x);
|
||||
|
|
@ -326,7 +326,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
* this is somewhat offset by run_compute being a native instruction. */
|
||||
unsigned task_axis = MALI_TASK_AXIS_X;
|
||||
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
wg_per_task, task_axis, false,
|
||||
wg_per_task, task_axis,
|
||||
cs_shader_res_sel(0, 0, 0, 0));
|
||||
} else {
|
||||
unsigned task_axis = MALI_TASK_AXIS_X;
|
||||
|
|
@ -334,7 +334,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
panvk_per_arch(calculate_task_axis_and_increment)(
|
||||
shader, phys_dev, &task_axis, &task_increment);
|
||||
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
task_increment, task_axis, false,
|
||||
task_increment, task_axis,
|
||||
cs_shader_res_sel(0, 0, 0, 0));
|
||||
}
|
||||
cs_req_res(b, 0);
|
||||
|
|
@ -347,7 +347,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add64(b, sync_addr, sync_addr,
|
||||
PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64));
|
||||
|
|
@ -371,7 +371,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
|
||||
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point;
|
||||
clear_dirty_after_dispatch(cmdbuf);
|
||||
|
|
|
|||
|
|
@ -769,7 +769,7 @@ cs_render_desc_ringbuf_reserve(struct cs_builder *b, uint32_t size)
|
|||
cs_load64_to(
|
||||
b, ringbuf_sync, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.syncobj));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Wait for the other end to release memory. */
|
||||
cs_move32_to(b, sz_reg, size - 1);
|
||||
|
|
@ -793,7 +793,7 @@ cs_render_desc_ringbuf_move_ptr(struct cs_builder *b, uint32_t size,
|
|||
b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Update the relative position and absolute address. */
|
||||
cs_add32(b, ptr_lo, ptr_lo, size);
|
||||
|
|
@ -813,7 +813,7 @@ cs_render_desc_ringbuf_move_ptr(struct cs_builder *b, uint32_t size,
|
|||
b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
|
@ -927,7 +927,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_move64_to(b, cs_scratch_reg64(b, 12), 0);
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 14), 0);
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Take care of the tiler desc with layer_offset=0 outside of the loop. */
|
||||
cs_move32_to(b, cs_scratch_reg32(b, 4),
|
||||
|
|
@ -942,7 +942,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr,
|
||||
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96);
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
uint32_t remaining_layers =
|
||||
td_count > 1
|
||||
|
|
@ -970,7 +970,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 64);
|
||||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr,
|
||||
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_update_vt_ctx(b)
|
||||
cs_add64(b, tiler_ctx_addr, tiler_ctx_addr,
|
||||
|
|
@ -1006,7 +1006,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), tiler_ctx_addr,
|
||||
BITFIELD_RANGE(0, 2) | BITFIELD_RANGE(10, 6), 96);
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add32(b, cs_scratch_reg32(b, 4), cs_scratch_reg32(b, 4),
|
||||
MAX_LAYERS_PER_TILER_DESC << 8);
|
||||
|
|
@ -1230,7 +1230,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load64_to(b, cur_tiler, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context,
|
||||
render.desc_ringbuf.ptr));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_add64(b, dst_fbd_ptr, cur_tiler,
|
||||
pan_size(TILER_CONTEXT) * td_count);
|
||||
}
|
||||
|
|
@ -1258,10 +1258,10 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16),
|
||||
pass_src_fbd_ptr, BITFIELD_MASK(16), fbd_off);
|
||||
}
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), pass_dst_fbd_ptr,
|
||||
BITFIELD_MASK(16), fbd_off);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
cs_add64(b, pass_src_fbd_ptr, pass_src_fbd_ptr, fbd_ir_pass_offset);
|
||||
cs_add64(b, pass_dst_fbd_ptr, pass_dst_fbd_ptr, fbd_ir_pass_offset);
|
||||
|
|
@ -1608,15 +1608,15 @@ wrap_prev_oq(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load64_to(
|
||||
b, prev_oq_node_reg, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.oq_chain));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_store64(b, prev_oq_node_reg, oq_node_reg,
|
||||
offsetof(struct panvk_cs_occlusion_query, next));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
cs_store64(b, oq_node_reg, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, render.oq_chain));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -2084,11 +2084,11 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
cs_while(b, MALI_CS_CONDITION_GREATER, counter_reg) {
|
||||
#if PAN_ARCH >= 12
|
||||
cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
flags_override.opaque[0], false, true, cs_undef(),
|
||||
flags_override.opaque[0], true, cs_undef(),
|
||||
MALI_IDVS_SHADING_MODE_EARLY);
|
||||
#else
|
||||
cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
flags_override.opaque[0], false, true,
|
||||
flags_override.opaque[0], true,
|
||||
cs_shader_res_sel(0, 0, 1, 0),
|
||||
cs_shader_res_sel(2, 2, 2, 0), cs_undef());
|
||||
#endif
|
||||
|
|
@ -2107,11 +2107,11 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
} else {
|
||||
#if PAN_ARCH >= 12
|
||||
cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
flags_override.opaque[0], false, true, cs_undef(),
|
||||
MALI_IDVS_SHADING_MODE_EARLY);
|
||||
flags_override.opaque[0], true, cs_undef(),
|
||||
MALI_IDVS_SHADING_MODE_EARLY);
|
||||
#else
|
||||
cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
flags_override.opaque[0], false, true,
|
||||
flags_override.opaque[0], true,
|
||||
cs_shader_res_sel(0, 0, 1, 0),
|
||||
cs_shader_res_sel(2, 2, 2, 0), cs_undef());
|
||||
#endif
|
||||
|
|
@ -2264,7 +2264,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
cs_load32_to(b, draw_count, draw_params_addr, 0);
|
||||
|
||||
/* wait for draw_count to load from buffer */
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_umin32(b, draw_count, draw_count, max_draw_count);
|
||||
} else {
|
||||
cs_move32_to(b, draw_count, draw->indirect.draw_count);
|
||||
|
|
@ -2288,7 +2288,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
}
|
||||
|
||||
/* Wait for the SR33-37 indirect buffer load. */
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
if (patch_faus) {
|
||||
if (shader_uses_sysval(vs, graphics, vs.first_vertex)) {
|
||||
|
|
@ -2305,7 +2305,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
/* Wait for the store using SR-37 as src to finish, so we can
|
||||
* overwrite it. */
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
if (patch_attribs != 0) {
|
||||
|
|
@ -2322,7 +2322,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
cs_load32_to(b, attrib_offset, vs_drv_set,
|
||||
pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t)));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Emulated immediate multiply: we walk the bits in
|
||||
* base_instance, and accumulate (stride << bit_pos) if the bit
|
||||
|
|
@ -2353,7 +2353,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
cs_store32(b, attrib_offset, vs_drv_set,
|
||||
pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t)));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2368,13 +2368,12 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
#if PAN_ARCH >= 12
|
||||
cs_trace_run_idvs2(b, tracing_ctx, tracing_scratch_regs,
|
||||
flags_override.opaque[0], false, true, draw_id,
|
||||
MALI_IDVS_SHADING_MODE_EARLY);
|
||||
flags_override.opaque[0], true, draw_id,
|
||||
MALI_IDVS_SHADING_MODE_EARLY);
|
||||
#else
|
||||
cs_trace_run_idvs(b, tracing_ctx, tracing_scratch_regs,
|
||||
flags_override.opaque[0], false, true,
|
||||
cs_shader_res_sel(0, 0, 1, 0),
|
||||
cs_shader_res_sel(2, 2, 2, 0), draw_id);
|
||||
cs_trace_run_idvs(
|
||||
b, tracing_ctx, tracing_scratch_regs, flags_override.opaque[0], true,
|
||||
cs_shader_res_sel(0, 0, 1, 0), cs_shader_res_sel(2, 2, 2, 0), draw_id);
|
||||
#endif
|
||||
|
||||
cs_add32(b, draw_count, draw_count, -1);
|
||||
|
|
@ -2627,7 +2626,7 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
|
|||
if (cmdbuf->state.gfx.render.tiler || inherits_render_ctx(cmdbuf)) {
|
||||
/* Flush the tiling operations and signal the internal sync object. */
|
||||
cs_req_res(b, CS_TILER_RES);
|
||||
cs_finish_tiling(b, false);
|
||||
cs_finish_tiling(b);
|
||||
cs_req_res(b, 0);
|
||||
|
||||
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||
|
|
@ -2638,7 +2637,7 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* We're relying on PANVK_SUBQUEUE_VERTEX_TILER being the first queue to
|
||||
* skip an ADD operation on the syncobjs pointer. */
|
||||
|
|
@ -2669,14 +2668,14 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Update the vertex seqno. */
|
||||
++cmdbuf->state.cs[PANVK_SUBQUEUE_VERTEX_TILER].relative_sync_point;
|
||||
} else {
|
||||
cs_load64_to(b, render_ctx, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, render));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2691,7 +2690,7 @@ wait_finish_tiling(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_load64_to(b, vt_sync_addr, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add64(b, vt_sync_point,
|
||||
cs_progress_seqno_reg(b, PANVK_SUBQUEUE_VERTEX_TILER),
|
||||
|
|
@ -2750,7 +2749,7 @@ setup_tiler_oom_ctx(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_store32(b, layer_count, cs_subqueue_ctx_reg(b),
|
||||
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
|
@ -2837,7 +2836,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load32_to(
|
||||
b, counter, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, tiler_oom_ctx.counter));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_if(b, MALI_CS_CONDITION_GREATER, counter)
|
||||
cs_update_frag_ctx(b)
|
||||
cs_add64(b, cs_sr_reg64(b, FRAGMENT, FBD_POINTER),
|
||||
|
|
@ -2854,7 +2853,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_flush_caches(b, MALI_CS_FLUSH_MODE_NONE, MALI_CS_FLUSH_MODE_NONE,
|
||||
MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, length_reg,
|
||||
cs_defer(0x0, SB_ID(IMM_FLUSH)));
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH));
|
||||
}
|
||||
|
||||
cs_req_res(b, CS_FRAG_RES);
|
||||
|
|
@ -2864,7 +2863,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_move32_to(b, layer_count, calc_enabled_layer_count(cmdbuf));
|
||||
cs_while(b, MALI_CS_CONDITION_GREATER, layer_count) {
|
||||
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
|
||||
cs_add32(b, layer_count, layer_count, -1);
|
||||
cs_update_frag_ctx(b)
|
||||
|
|
@ -2873,7 +2872,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
} else {
|
||||
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
}
|
||||
cs_req_res(b, 0);
|
||||
|
||||
|
|
@ -2907,7 +2906,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
render.desc_ringbuf.syncobj));
|
||||
}
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add64(b, sync_addr, sync_addr,
|
||||
PANVK_SUBQUEUE_FRAGMENT * sizeof(struct panvk_cs_sync64));
|
||||
|
|
@ -2920,12 +2919,12 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC)); \
|
||||
if (td_count == 1) { \
|
||||
cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); \
|
||||
cs_wait_slot(b, SB_ID(LS), false); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_finish_fragment(b, true, completed_top, completed_bottom, async); \
|
||||
} else if (td_count > 1) { \
|
||||
cs_while(b, MALI_CS_CONDITION_GREATER, tiler_count) { \
|
||||
cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); \
|
||||
cs_wait_slot(b, SB_ID(LS), false); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_finish_fragment(b, false, completed_top, completed_bottom, \
|
||||
async); \
|
||||
cs_update_frag_ctx(b) \
|
||||
|
|
@ -2948,20 +2947,20 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_load64_to( \
|
||||
b, oq_chain, cs_subqueue_ctx_reg(b), \
|
||||
offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); \
|
||||
cs_wait_slot(b, SB_ID(LS), false); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
/* We use oq_syncobj as a placeholder to reset the oq_chain. */ \
|
||||
cs_move64_to(b, oq_syncobj, 0); \
|
||||
cs_store64( \
|
||||
b, oq_syncobj, cs_subqueue_ctx_reg(b), \
|
||||
offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); \
|
||||
cs_wait_slot(b, SB_ID(LS), false); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_while(b, MALI_CS_CONDITION_ALWAYS, cs_undef()) { \
|
||||
cs_load64_to(b, oq_syncobj, oq_chain, \
|
||||
offsetof(struct panvk_cs_occlusion_query, syncobj)); \
|
||||
cs_wait_slot(b, SB_ID(LS), false); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_load64_to(b, oq_chain, oq_chain, \
|
||||
offsetof(struct panvk_cs_occlusion_query, next)); \
|
||||
cs_wait_slot(b, SB_ID(LS), false); \
|
||||
cs_wait_slot(b, SB_ID(LS)); \
|
||||
cs_sync32_set( \
|
||||
b, true, MALI_CS_SYNC_SCOPE_CSG, add_val_lo, oq_syncobj, \
|
||||
cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); \
|
||||
|
|
@ -2987,7 +2986,7 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* Update the ring buffer position. */
|
||||
if (free_render_descs) {
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
|
|||
(i * sizeof(struct panvk_cs_sync32)));
|
||||
cs_load32_to(b, seqno, sync_addr,
|
||||
offsetof(struct panvk_cs_sync32, seqno));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_match(b, seqno, cmp_scratch) {
|
||||
cs_case(b, 0) {
|
||||
|
|
@ -83,7 +83,7 @@ panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
|
|||
(i * sizeof(struct panvk_cs_sync32)));
|
||||
cs_load32_to(b, seqno, sync_addr,
|
||||
offsetof(struct panvk_cs_sync32, seqno));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_match(b, seqno, cmp_scratch) {
|
||||
cs_case(b, 0) {
|
||||
|
|
|
|||
|
|
@ -95,10 +95,10 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
if (shader->info.tls_size) {
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu);
|
||||
cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_move64_to(b, cs_scratch_reg64(b, 0), tsd);
|
||||
cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
cs_update_compute_ctx(b) {
|
||||
|
|
@ -146,7 +146,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
panvk_per_arch(calculate_task_axis_and_increment)(
|
||||
shader, phys_dev, &task_axis, &task_increment);
|
||||
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
task_increment, task_axis, false,
|
||||
task_increment, task_axis,
|
||||
cs_shader_res_sel(0, 0, 0, 0));
|
||||
cs_req_res(b, 0);
|
||||
|
||||
|
|
@ -158,7 +158,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
|
||||
BITFIELD_MASK(3),
|
||||
offsetof(struct panvk_cs_subqueue_context, syncobjs));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_add64(b, sync_addr, sync_addr,
|
||||
PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64));
|
||||
|
|
@ -182,7 +182,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
|
||||
cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
|
||||
offsetof(struct panvk_cs_subqueue_context, iter_sb));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point;
|
||||
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ panvk_cmd_reset_occlusion_queries(struct panvk_cmd_buffer *cmd,
|
|||
/* Wait on deferred sync to ensure all prior query operations have
|
||||
* completed
|
||||
*/
|
||||
cs_wait_slot(b, SB_ID(DEFERRED_SYNC), false);
|
||||
cs_wait_slot(b, SB_ID(DEFERRED_SYNC));
|
||||
|
||||
struct cs_index addr = cs_scratch_reg64(b, 16);
|
||||
struct cs_index zero_regs = cs_scratch_reg_tuple(b, 0, 16);
|
||||
|
|
@ -113,7 +113,7 @@ panvk_cmd_reset_occlusion_queries(struct panvk_cmd_buffer *cmd,
|
|||
|
||||
/* reset_oq_batch() only does the stores, we need to flush those explicitly
|
||||
* here. */
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
/* We flush the caches to make the new value visible to the CPU. */
|
||||
struct cs_index flush_id = cs_scratch_reg32(b, 0);
|
||||
|
|
@ -121,7 +121,7 @@ panvk_cmd_reset_occlusion_queries(struct panvk_cmd_buffer *cmd,
|
|||
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN,
|
||||
MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id,
|
||||
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -152,7 +152,7 @@ panvk_cmd_begin_occlusion_query(struct panvk_cmd_buffer *cmd,
|
|||
cs_move64_to(b, report_addr_gpu, report_addr);
|
||||
cs_move64_to(b, clear_value, 0);
|
||||
cs_store64(b, clear_value, report_addr_gpu, 0);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -219,7 +219,7 @@ copy_oq_result_batch(struct cs_builder *b,
|
|||
}
|
||||
|
||||
/* Flush the loads. */
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
for (uint32_t i = 0; i < query_count; i++) {
|
||||
struct cs_index store_src =
|
||||
|
|
@ -230,7 +230,7 @@ copy_oq_result_batch(struct cs_builder *b,
|
|||
}
|
||||
|
||||
/* Flush the stores. */
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -245,7 +245,7 @@ panvk_copy_occlusion_query_results(struct panvk_cmd_buffer *cmd,
|
|||
|
||||
/* Wait for occlusion query syncobjs to be signalled. */
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT)
|
||||
cs_wait_slot(b, SB_ID(DEFERRED_SYNC), false);
|
||||
cs_wait_slot(b, SB_ID(DEFERRED_SYNC));
|
||||
|
||||
uint32_t res_size = (flags & VK_QUERY_RESULT_64_BIT) ? 2 : 1;
|
||||
uint32_t regs_per_copy =
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
* rendering has already been triggered */
|
||||
cs_load32_to(&b, counter, subqueue_ctx,
|
||||
TILER_OOM_CTX_FIELD_OFFSET(counter));
|
||||
cs_wait_slot(&b, SB_ID(LS), false);
|
||||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
cs_if(&b, MALI_CS_CONDITION_GREATER, counter)
|
||||
cs_load64_to(&b, fbd_ptr, subqueue_ctx,
|
||||
|
|
@ -87,19 +87,18 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
|
||||
cs_load32_to(&b, layer_count, subqueue_ctx,
|
||||
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
|
||||
cs_wait_slot(&b, SB_ID(LS), false);
|
||||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
cs_req_res(&b, CS_FRAG_RES);
|
||||
cs_while(&b, MALI_CS_CONDITION_GREATER, layer_count) {
|
||||
cs_trace_run_fragment(&b, &tracing_ctx,
|
||||
cs_scratch_reg_tuple(&b, 8, 4), false,
|
||||
MALI_TILE_RENDER_ORDER_Z_ORDER, false);
|
||||
cs_trace_run_fragment(&b, &tracing_ctx, cs_scratch_reg_tuple(&b, 8, 4),
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
cs_add32(&b, layer_count, layer_count, -1);
|
||||
cs_add64(&b, fbd_ptr, fbd_ptr, fbd_size);
|
||||
}
|
||||
cs_req_res(&b, 0);
|
||||
/* Wait for all iter scoreboards for simplicity. */
|
||||
cs_wait_slots(&b, SB_ALL_ITERS_MASK, false);
|
||||
cs_wait_slots(&b, SB_ALL_ITERS_MASK);
|
||||
|
||||
/* Increment counter */
|
||||
cs_add32(&b, counter, counter, 1);
|
||||
|
|
@ -111,12 +110,12 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
cs_load32_to(&b, td_count, subqueue_ctx,
|
||||
TILER_OOM_CTX_FIELD_OFFSET(td_count));
|
||||
cs_move64_to(&b, zero, 0);
|
||||
cs_wait_slot(&b, SB_ID(LS), false);
|
||||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
cs_while(&b, MALI_CS_CONDITION_GREATER, td_count) {
|
||||
/* Load completed chunks */
|
||||
cs_load_to(&b, completed_chunks, tiler_ptr, BITFIELD_MASK(4), 10 * 4);
|
||||
cs_wait_slot(&b, SB_ID(LS), false);
|
||||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
cs_finish_fragment(&b, false, completed_top, completed_bottom,
|
||||
cs_now());
|
||||
|
|
@ -136,7 +135,7 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
MALI_CS_OTHER_FLUSH_MODE_INVALIDATE, flush_id,
|
||||
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
|
||||
|
||||
cs_wait_slot(&b, SB_ID(IMM_FLUSH), false);
|
||||
cs_wait_slot(&b, SB_ID(IMM_FLUSH));
|
||||
}
|
||||
|
||||
assert(cs_is_valid(&b));
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ cmd_copy_data(struct cs_builder *b, uint64_t dst_addr, uint64_t src_addr,
|
|||
assert((dst_addr | src_addr | size) % sizeof(uint32_t) == 0);
|
||||
|
||||
/* wait for timestamp writes */
|
||||
cs_wait_slot(b, SB_ID(DEFERRED_SYNC), false);
|
||||
cs_wait_slot(b, SB_ID(DEFERRED_SYNC));
|
||||
|
||||
/* Depending on where this is called from, we could potentially use SR
|
||||
* registers or copy with a compute job.
|
||||
|
|
@ -52,7 +52,7 @@ cmd_copy_data(struct cs_builder *b, uint64_t dst_addr, uint64_t src_addr,
|
|||
const struct cs_index reg = cs_scratch_reg_tuple(b, 4, count);
|
||||
|
||||
cs_load_to(b, reg, src_addr_reg, BITFIELD_MASK(count), offset);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
cs_store(b, reg, dst_addr_reg, BITFIELD_MASK(count), offset);
|
||||
|
||||
copy_count -= count;
|
||||
|
|
@ -64,7 +64,7 @@ cmd_copy_data(struct cs_builder *b, uint64_t dst_addr, uint64_t src_addr,
|
|||
size -= offset;
|
||||
}
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
}
|
||||
|
||||
static struct cs_builder *
|
||||
|
|
@ -170,7 +170,7 @@ panvk_per_arch(utrace_clone_finish_builder)(struct cs_builder *b)
|
|||
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_NONE,
|
||||
MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id,
|
||||
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
|
||||
cs_wait_slot(b, SB_ID(IMM_FLUSH));
|
||||
|
||||
cs_finish(b);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue