panvk/utrace: Add flush_cache support

Also pulls flush_cache information out of the barrier tracepoint.

Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36519>
This commit is contained in:
Lars-Ivar Hesselberg Simonsen 2025-08-05 13:26:08 +02:00 committed by Marge Bot
parent 2c8d77b94a
commit 856daf4ce2
6 changed files with 76 additions and 29 deletions

View file

@ -14,6 +14,7 @@ enum panvk_instr_work_type {
PANVK_INSTR_WORK_TYPE_DISPATCH,
PANVK_INSTR_WORK_TYPE_DISPATCH_INDIRECT,
PANVK_INSTR_WORK_TYPE_BARRIER,
PANVK_INSTR_WORK_TYPE_FLUSH_CACHE,
PANVK_INSTR_WORK_TYPE_SYNC32_ADD,
PANVK_INSTR_WORK_TYPE_SYNC64_ADD,
PANVK_INSTR_WORK_TYPE_SYNC32_WAIT,
@ -26,9 +27,6 @@ struct panvk_instr_end_args {
struct {
uint8_t wait_sb_mask;
uint8_t wait_subqueue_mask;
uint8_t l2;
uint8_t lsc;
uint8_t other;
} barrier;
struct {
@ -56,6 +54,12 @@ struct panvk_instr_end_args {
uint64_t buffer_gpu;
} dispatch_indirect;
struct {
uint8_t l2;
uint8_t lsc;
uint8_t other;
} flush_cache;
struct {
struct cs_index addr_regs;
struct cs_index val_regs;

View file

@ -150,11 +150,21 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
* pushed back to main memory after the CPU has written new stuff there. */
struct cs_index flush_id = cs_scratch_reg32(b, 0);
panvk_per_arch(panvk_instr_begin_work)(subqueue, cmdbuf,
PANVK_INSTR_WORK_TYPE_FLUSH_CACHE);
cs_move32_to(b, flush_id, 0);
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN,
MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id,
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
cs_wait_slot(b, SB_ID(IMM_FLUSH));
struct panvk_instr_end_args instr_info_flush = {
.flush_cache = {
.l2 = MALI_CS_FLUSH_MODE_CLEAN,
.lsc = MALI_CS_FLUSH_MODE_CLEAN,
.other = MALI_CS_OTHER_FLUSH_MODE_NONE,
}};
panvk_per_arch(panvk_instr_end_work)(
subqueue, cmdbuf, PANVK_INSTR_WORK_TYPE_FLUSH_CACHE, &instr_info_flush);
/* If this is a secondary command buffer, we don't poison the reg file to
* preserve the render pass context. We also don't poison the reg file if the
@ -172,11 +182,11 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
}
}
struct panvk_instr_end_args instr_info = {.cmdbuf = {
.flags = cmdbuf->flags,
}};
struct panvk_instr_end_args instr_info_cmdbuf = {.cmdbuf = {
.flags = cmdbuf->flags,
}};
panvk_per_arch(panvk_instr_end_work)(
subqueue, cmdbuf, PANVK_INSTR_WORK_TYPE_CMDBUF, &instr_info);
subqueue, cmdbuf, PANVK_INSTR_WORK_TYPE_CMDBUF, &instr_info_cmdbuf);
cs_finish(&cmdbuf->state.cs[subqueue].builder);
}
@ -621,10 +631,20 @@ panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf,
if (!panvk_cache_flush_is_nop(&cache_flush)) {
struct cs_index flush_id = cs_scratch_reg32(b, 0);
panvk_per_arch(panvk_instr_begin_work)(
i, cmdbuf, PANVK_INSTR_WORK_TYPE_FLUSH_CACHE);
cs_move32_to(b, flush_id, 0);
cs_flush_caches(b, cache_flush.l2, cache_flush.lsc, cache_flush.others,
flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
cs_wait_slot(b, SB_ID(IMM_FLUSH));
struct panvk_instr_end_args instr_info_flush = {
.flush_cache = {
.l2 = cache_flush.l2,
.lsc = cache_flush.lsc,
.other = cache_flush.others,
}};
panvk_per_arch(panvk_instr_end_work)(
i, cmdbuf, PANVK_INSTR_WORK_TYPE_FLUSH_CACHE, &instr_info_flush);
}
/* If no one waits on us, there's no point signaling the sync object. */
@ -661,11 +681,10 @@ panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf,
u_foreach_bit(i, utrace_subqueue_mask) {
struct panvk_instr_end_args info = {
.barrier = {.wait_sb_mask = deps.src[i].wait_sb_mask,
.wait_subqueue_mask = deps.dst[i].wait_subqueue_mask,
.l2 = deps.src[i].cache_flush.l2,
.lsc = deps.src[i].cache_flush.lsc,
.other = deps.src[i].cache_flush.others}};
.barrier = {
.wait_sb_mask = deps.src[i].wait_sb_mask,
.wait_subqueue_mask = deps.dst[i].wait_subqueue_mask,
}};
panvk_per_arch(panvk_instr_end_work)(
i, cmdbuf, PANVK_INSTR_WORK_TYPE_BARRIER, &info);
}

View file

@ -17,8 +17,7 @@ panvk_instr_end_barrier(enum panvk_subqueue_id id,
{
trace_end_barrier(&cs_info->cmdbuf->utrace.uts[id], cs_info,
args->barrier.wait_sb_mask,
args->barrier.wait_subqueue_mask, args->barrier.l2,
args->barrier.lsc, args->barrier.other);
args->barrier.wait_subqueue_mask);
}
static void
@ -63,6 +62,16 @@ panvk_instr_end_dispatch_indirect(enum panvk_subqueue_id id,
});
}
static void
panvk_instr_end_flush_cache(enum panvk_subqueue_id id,
struct panvk_utrace_cs_info *cs_info,
const struct panvk_instr_end_args *const args)
{
trace_end_flush_cache(&cs_info->cmdbuf->utrace.uts[id], cs_info,
args->flush_cache.l2, args->flush_cache.lsc,
args->flush_cache.other);
}
static void
panvk_instr_end_sync32_add(enum panvk_subqueue_id id,
struct panvk_utrace_cs_info *cs_info,
@ -165,6 +174,9 @@ panvk_per_arch(panvk_instr_begin_work)(enum panvk_subqueue_id id,
case PANVK_INSTR_WORK_TYPE_BARRIER:
trace_begin_barrier(&cmdbuf->utrace.uts[id], &cs_info);
break;
case PANVK_INSTR_WORK_TYPE_FLUSH_CACHE:
trace_begin_flush_cache(&cmdbuf->utrace.uts[id], &cs_info);
break;
case PANVK_INSTR_WORK_TYPE_SYNC32_ADD:
trace_begin_sync32_add(&cmdbuf->utrace.uts[id], &cs_info);
break;
@ -223,6 +235,9 @@ panvk_per_arch(panvk_instr_end_work_async)(
case PANVK_INSTR_WORK_TYPE_BARRIER:
panvk_instr_end_barrier(id, &cs_info, args);
break;
case PANVK_INSTR_WORK_TYPE_FLUSH_CACHE:
panvk_instr_end_flush_cache(id, &cs_info, args);
break;
case PANVK_INSTR_WORK_TYPE_SYNC32_ADD:
panvk_instr_end_sync32_add(id, &cs_info, args);
break;

View file

@ -202,21 +202,6 @@ def define_tracepoints():
var='sync_wait',
c_format='0x%x',
),
Arg(
type='uint8_t',
var='l2',
c_format='%u',
),
Arg(
type='uint8_t',
var='lsc',
c_format='%u',
),
Arg(
type='uint8_t',
var='other',
c_format='%u',
),
],
)
@ -304,6 +289,27 @@ def define_tracepoints():
],
)
begin_end_tp(
'flush_cache',
args=[
Arg(
type='uint8_t',
var='l2',
c_format='%u',
),
Arg(
type='uint8_t',
var='lsc',
c_format='%u',
),
Arg(
type='uint8_t',
var='other',
c_format='%u',
),
],
)
def generate_code():
utrace_generate(
cpath=args.utrace_src,

View file

@ -43,6 +43,7 @@ get_stage_name(enum panvk_utrace_perfetto_stage stage)
CASE(RENDER);
CASE(DISPATCH);
CASE(BARRIER);
CASE(FLUSH_CACHE);
CASE(SYNC_ADD);
CASE(SYNC_WAIT);
#undef CASE
@ -250,6 +251,7 @@ PANVK_UTRACE_PERFETTO_PROCESS_EVENT(render, RENDER)
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(dispatch, DISPATCH)
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(dispatch_indirect, DISPATCH)
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(barrier, BARRIER)
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(flush_cache, FLUSH_CACHE)
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(sync32_add, SYNC_ADD)
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(sync64_add, SYNC_ADD)
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(sync32_wait, SYNC_WAIT)

View file

@ -24,6 +24,7 @@ enum panvk_utrace_perfetto_stage {
PANVK_UTRACE_PERFETTO_STAGE_RENDER,
PANVK_UTRACE_PERFETTO_STAGE_DISPATCH,
PANVK_UTRACE_PERFETTO_STAGE_BARRIER,
PANVK_UTRACE_PERFETTO_STAGE_FLUSH_CACHE,
PANVK_UTRACE_PERFETTO_STAGE_SYNC_ADD,
PANVK_UTRACE_PERFETTO_STAGE_SYNC_WAIT,