From 2c8d77b94a838dd369248a03a83e49cca0a9683c Mon Sep 17 00:00:00 2001 From: Lars-Ivar Hesselberg Simonsen Date: Thu, 31 Jul 2025 17:30:59 +0200 Subject: [PATCH] panvk/utrace: Add sync32/64_add support Add the ability to capture sync32/64_add during tracing. Reviewed-by: Christoph Pillmayer Part-of: --- src/panfrost/vulkan/csf/panvk_instr.h | 20 ++++++++ src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c | 4 +- .../vulkan/csf/panvk_vX_cmd_dispatch.c | 10 ++-- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 20 ++++---- .../vulkan/csf/panvk_vX_cmd_precomp.c | 11 +++-- src/panfrost/vulkan/csf/panvk_vX_instr.c | 46 +++++++++++++++++++ src/panfrost/vulkan/panvk_tracepoints.py | 36 +++++++++++++++ src/panfrost/vulkan/panvk_utrace_perfetto.cc | 3 ++ src/panfrost/vulkan/panvk_utrace_perfetto.h | 1 + 9 files changed, 133 insertions(+), 18 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_instr.h b/src/panfrost/vulkan/csf/panvk_instr.h index 68bd8c6505c..d0ed4070c46 100644 --- a/src/panfrost/vulkan/csf/panvk_instr.h +++ b/src/panfrost/vulkan/csf/panvk_instr.h @@ -14,6 +14,8 @@ enum panvk_instr_work_type { PANVK_INSTR_WORK_TYPE_DISPATCH, PANVK_INSTR_WORK_TYPE_DISPATCH_INDIRECT, PANVK_INSTR_WORK_TYPE_BARRIER, + PANVK_INSTR_WORK_TYPE_SYNC32_ADD, + PANVK_INSTR_WORK_TYPE_SYNC64_ADD, PANVK_INSTR_WORK_TYPE_SYNC32_WAIT, PANVK_INSTR_WORK_TYPE_SYNC64_WAIT, }; @@ -86,6 +88,24 @@ void panvk_per_arch(panvk_instr_end_work_async)( struct cs_async_op ts_async_op); #define PANVK_INSTR_SYNC_OPS(__cnt_width) \ + static inline void panvk_instr_sync##__cnt_width##_add( \ + struct panvk_cmd_buffer *cmdbuf, enum panvk_subqueue_id id, \ + bool propagate_error, enum mali_cs_sync_scope scope, \ + struct cs_index val, struct cs_index addr, struct cs_async_op async) \ + { \ + struct cs_builder *b = panvk_get_cs_builder(cmdbuf, id); \ + panvk_per_arch(panvk_instr_begin_work)( \ + id, cmdbuf, PANVK_INSTR_WORK_TYPE_SYNC##__cnt_width##_ADD); \ + cs_sync##__cnt_width##_add(b, propagate_error, scope, val, addr, async); \ + struct panvk_instr_end_args instr_info = { \ + .sync = {.addr_regs = addr, .val_regs = val}, \ + }; \ + async.signal_slot = 0; \ + panvk_per_arch(panvk_instr_end_work_async)( \ + id, cmdbuf, PANVK_INSTR_WORK_TYPE_SYNC##__cnt_width##_ADD, \ + &instr_info, async); \ + } \ + \ static inline void panvk_instr_sync##__cnt_width##_wait( \ struct panvk_cmd_buffer *cmdbuf, enum panvk_subqueue_id id, \ bool reject_error, enum mali_cs_condition cond, struct cs_index ref, \ diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c index 90115c3fcc7..8c8a130f2e2 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c @@ -638,8 +638,8 @@ panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf, offsetof(struct panvk_cs_subqueue_context, syncobjs)); cs_add64(b, sync_addr, sync_addr, sizeof(struct panvk_cs_sync64) * i); cs_move64_to(b, add_val, 1); - cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, - cs_now()); + panvk_instr_sync64_add(cmdbuf, i, true, MALI_CS_SYNC_SCOPE_CSG, + add_val, sync_addr, cs_now()); ++cs_state->relative_sync_point; } } diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 1abc3bd9165..eca3b1f4421 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -313,8 +313,9 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) cs_add64(b, sync_addr, sync_addr, PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64)); cs_move64_to(b, add_val, 1); - cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, - cs_defer_indirect()); + panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_COMPUTE, true, + MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, + cs_defer_indirect()); #else struct cs_index sync_addr = cs_scratch_reg64(b, 0); struct cs_index iter_sb = cs_scratch_reg32(b, 2); @@ -332,8 +333,9 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) cs_match(b, iter_sb, cmp_scratch) { #define CASE(x) \ cs_case(b, SB_ITER(x)) { \ - cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, \ - cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \ + panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_COMPUTE, true, \ + MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, \ + cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \ } CASE(0) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 68c024add2c..3076db520d4 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -2950,8 +2950,9 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf) cs_move64_to(b, add_val, 1); cs_heap_operation(b, MALI_CS_HEAP_OPERATION_VERTEX_TILER_COMPLETED, cs_defer_indirect()); - cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, - cs_defer_indirect()); + panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER, true, + MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, + cs_defer_indirect()); #else struct cs_index sync_addr = cs_scratch_reg64(b, 0); struct cs_index iter_sb = cs_scratch_reg32(b, 2); @@ -2969,8 +2970,9 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf) cs_case(b, SB_ITER(x)) { \ cs_heap_operation(b, MALI_CS_HEAP_OPERATION_VERTEX_TILER_COMPLETED, \ cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \ - cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, \ - cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \ + panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER, true, \ + MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, \ + cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \ } CASE(0) @@ -3331,8 +3333,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) } } - cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, - cs_defer_indirect()); + panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_FRAGMENT, true, + MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, + cs_defer_indirect()); #else struct cs_index iter_sb = cs_scratch_reg32(b, 2); struct cs_index cmp_scratch = cs_scratch_reg32(b, 3); @@ -3392,8 +3395,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); \ } \ } \ - cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, \ - async); \ + panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_FRAGMENT, true, \ + MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, \ + async); \ } CASE(0) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c index 10ff557fcc2..34449c4080c 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c @@ -10,6 +10,7 @@ #include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" #include "panvk_cmd_precomp.h" +#include "panvk_instr.h" #include "panvk_macros.h" #include "panvk_mempool.h" #include "panvk_precomp_cache.h" @@ -125,8 +126,9 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, cs_add64(b, sync_addr, sync_addr, PANVK_SUBQUEUE_COMPUTE * sizeof(struct panvk_cs_sync64)); cs_move64_to(b, add_val, 1); - cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, - cs_defer_indirect()); + panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_COMPUTE, true, + MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, + cs_defer_indirect()); #else struct cs_index sync_addr = cs_scratch_reg64(b, 0); struct cs_index iter_sb = cs_scratch_reg32(b, 2); @@ -144,8 +146,9 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, cs_match(b, iter_sb, cmp_scratch) { #define CASE(x) \ cs_case(b, SB_ITER(x)) { \ - cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, \ - cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \ + panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_COMPUTE, true, \ + MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, \ + cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \ } CASE(0) diff --git a/src/panfrost/vulkan/csf/panvk_vX_instr.c b/src/panfrost/vulkan/csf/panvk_vX_instr.c index 051c31a274e..9ce68823565 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_instr.c +++ b/src/panfrost/vulkan/csf/panvk_vX_instr.c @@ -63,6 +63,40 @@ panvk_instr_end_dispatch_indirect(enum panvk_subqueue_id id, }); } +static void +panvk_instr_end_sync32_add(enum panvk_subqueue_id id, + struct panvk_utrace_cs_info *cs_info, + const struct panvk_instr_end_args *const args) +{ + assert(args->sync.val_regs.size == 1); + trace_end_sync32_add(&cs_info->cmdbuf->utrace.uts[id], cs_info, + (struct u_trace_address){ + .bo = (void *)PANVK_UTRACE_CAPTURE_REGISTERS, + .offset = args->sync.addr_regs.reg, + }, + (struct u_trace_address){ + .bo = (void *)PANVK_UTRACE_CAPTURE_REGISTERS, + .offset = args->sync.val_regs.reg, + }); +} + +static void +panvk_instr_end_sync64_add(enum panvk_subqueue_id id, + struct panvk_utrace_cs_info *cs_info, + const struct panvk_instr_end_args *const args) +{ + assert(args->sync.val_regs.size == 2); + trace_end_sync64_add(&cs_info->cmdbuf->utrace.uts[id], cs_info, + (struct u_trace_address){ + .bo = (void *)PANVK_UTRACE_CAPTURE_REGISTERS, + .offset = args->sync.addr_regs.reg, + }, + (struct u_trace_address){ + .bo = (void *)PANVK_UTRACE_CAPTURE_REGISTERS, + .offset = args->sync.val_regs.reg, + }); +} + static void panvk_instr_end_sync32_wait(enum panvk_subqueue_id id, struct panvk_utrace_cs_info *cs_info, @@ -131,6 +165,12 @@ panvk_per_arch(panvk_instr_begin_work)(enum panvk_subqueue_id id, case PANVK_INSTR_WORK_TYPE_BARRIER: trace_begin_barrier(&cmdbuf->utrace.uts[id], &cs_info); break; + case PANVK_INSTR_WORK_TYPE_SYNC32_ADD: + trace_begin_sync32_add(&cmdbuf->utrace.uts[id], &cs_info); + break; + case PANVK_INSTR_WORK_TYPE_SYNC64_ADD: + trace_begin_sync64_add(&cmdbuf->utrace.uts[id], &cs_info); + break; case PANVK_INSTR_WORK_TYPE_SYNC32_WAIT: trace_begin_sync32_wait(&cmdbuf->utrace.uts[id], &cs_info); break; @@ -183,6 +223,12 @@ panvk_per_arch(panvk_instr_end_work_async)( case PANVK_INSTR_WORK_TYPE_BARRIER: panvk_instr_end_barrier(id, &cs_info, args); break; + case PANVK_INSTR_WORK_TYPE_SYNC32_ADD: + panvk_instr_end_sync32_add(id, &cs_info, args); + break; + case PANVK_INSTR_WORK_TYPE_SYNC64_ADD: + panvk_instr_end_sync64_add(id, &cs_info, args); + break; case PANVK_INSTR_WORK_TYPE_SYNC32_WAIT: panvk_instr_end_sync32_wait(id, &cs_info, args); break; diff --git a/src/panfrost/vulkan/panvk_tracepoints.py b/src/panfrost/vulkan/panvk_tracepoints.py index c59ea61670b..2147c0e4df9 100644 --- a/src/panfrost/vulkan/panvk_tracepoints.py +++ b/src/panfrost/vulkan/panvk_tracepoints.py @@ -222,6 +222,42 @@ def define_tracepoints(): # low-level tracepoints for CS commands + begin_end_tp( + 'sync32_add', + args=[ + Arg( + type='uint64_t', + var='sync_addr', + is_indirect=True, + c_format='0x%" PRIx64 "' + ), + Arg( + type='uint32_t', + var='sync_val', + is_indirect=True, + c_format='%u', + ), + ], + ) + + begin_end_tp( + 'sync64_add', + args=[ + Arg( + type='uint64_t', + var='sync_addr', + is_indirect=True, + c_format='0x%" PRIx64 "' + ), + Arg( + type='uint64_t', + var='sync_val', + is_indirect=True, + c_format='%" PRIu64 "', + ), + ], + ) + begin_end_tp( 'sync32_wait', args=[ diff --git a/src/panfrost/vulkan/panvk_utrace_perfetto.cc b/src/panfrost/vulkan/panvk_utrace_perfetto.cc index d5c52452ce9..3210e0a9ee9 100644 --- a/src/panfrost/vulkan/panvk_utrace_perfetto.cc +++ b/src/panfrost/vulkan/panvk_utrace_perfetto.cc @@ -43,6 +43,7 @@ get_stage_name(enum panvk_utrace_perfetto_stage stage) CASE(RENDER); CASE(DISPATCH); CASE(BARRIER); + CASE(SYNC_ADD); CASE(SYNC_WAIT); #undef CASE default: @@ -249,6 +250,8 @@ PANVK_UTRACE_PERFETTO_PROCESS_EVENT(render, RENDER) PANVK_UTRACE_PERFETTO_PROCESS_EVENT(dispatch, DISPATCH) PANVK_UTRACE_PERFETTO_PROCESS_EVENT(dispatch_indirect, DISPATCH) PANVK_UTRACE_PERFETTO_PROCESS_EVENT(barrier, BARRIER) +PANVK_UTRACE_PERFETTO_PROCESS_EVENT(sync32_add, SYNC_ADD) +PANVK_UTRACE_PERFETTO_PROCESS_EVENT(sync64_add, SYNC_ADD) PANVK_UTRACE_PERFETTO_PROCESS_EVENT(sync32_wait, SYNC_WAIT) PANVK_UTRACE_PERFETTO_PROCESS_EVENT(sync64_wait, SYNC_WAIT) diff --git a/src/panfrost/vulkan/panvk_utrace_perfetto.h b/src/panfrost/vulkan/panvk_utrace_perfetto.h index 67b7699896a..702271de08d 100644 --- a/src/panfrost/vulkan/panvk_utrace_perfetto.h +++ b/src/panfrost/vulkan/panvk_utrace_perfetto.h @@ -24,6 +24,7 @@ enum panvk_utrace_perfetto_stage { PANVK_UTRACE_PERFETTO_STAGE_RENDER, PANVK_UTRACE_PERFETTO_STAGE_DISPATCH, PANVK_UTRACE_PERFETTO_STAGE_BARRIER, + PANVK_UTRACE_PERFETTO_STAGE_SYNC_ADD, PANVK_UTRACE_PERFETTO_STAGE_SYNC_WAIT, PANVK_UTRACE_PERFETTO_STAGE_COUNT,