diff --git a/src/panfrost/vulkan/csf/panvk_vX_queue.c b/src/panfrost/vulkan/csf/panvk_vX_queue.c index da4d81c46a6..e195fb1e313 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_queue.c +++ b/src/panfrost/vulkan/csf/panvk_vX_queue.c @@ -739,6 +739,7 @@ struct panvk_queue_submit { uint32_t queue_mask; enum panvk_subqueue_id first_subqueue; enum panvk_subqueue_id last_subqueue; + bool needs_clone; const struct u_trace *last_ut; struct panvk_utrace_flush_data *data_storage; @@ -792,13 +793,19 @@ panvk_queue_submit_init_storage( submit->qsubmit_count++; struct u_trace *ut = &cmdbuf->utrace.uts[j]; - if (submit->process_utrace && u_trace_has_points(ut) && - (cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) { + if (submit->process_utrace && u_trace_has_points(ut)) { submit->utrace.queue_mask |= BITFIELD_BIT(j); if (submit->utrace.first_subqueue == PANVK_SUBQUEUE_COUNT) submit->utrace.first_subqueue = j; submit->utrace.last_subqueue = j; submit->utrace.last_ut = ut; + + if (!(cmdbuf->flags & + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) { + /* we will follow the user cs with a timestamp copy cs */ + submit->qsubmit_count++; + submit->utrace.needs_clone = true; + } } } } @@ -861,6 +868,8 @@ static void panvk_queue_submit_init_utrace(struct panvk_queue_submit *submit, const struct vk_queue_submit *vk_submit) { + struct panvk_device *dev = submit->dev; + if (!submit->utrace.queue_mask) return; @@ -882,6 +891,11 @@ panvk_queue_submit_init_utrace(struct panvk_queue_submit *submit, .wait_value = wait ? submit->queue->utrace.next_value : 0, }; } + + if (submit->utrace.needs_clone) { + struct panvk_pool *clone_pool = &submit->utrace.data_storage->clone_pool; + panvk_per_arch(utrace_clone_init_pool)(clone_pool, dev); + } } static void @@ -943,11 +957,38 @@ panvk_queue_submit_init_cmdbufs(struct panvk_queue_submit *submit, u_foreach_bit(j, submit->utrace.queue_mask) { struct u_trace *ut = &cmdbuf->utrace.uts[j]; - if (!u_trace_has_points(ut) || - !(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) + if (!u_trace_has_points(ut)) continue; const bool free_data = ut == submit->utrace.last_ut; + + struct u_trace clone_ut; + if (!(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) { + u_trace_init(&clone_ut, &dev->utrace.utctx); + + struct panvk_pool *clone_pool = + &submit->utrace.data_storage->clone_pool; + struct cs_builder clone_builder; + panvk_per_arch(utrace_clone_init_builder)(&clone_builder, + clone_pool); + + u_trace_clone_append( + u_trace_begin_iterator(ut), u_trace_end_iterator(ut), &clone_ut, + &clone_builder, panvk_per_arch(utrace_copy_buffer)); + + panvk_per_arch(utrace_clone_finish_builder)(&clone_builder); + + submit->qsubmits[submit->qsubmit_count++] = + (struct drm_panthor_queue_submit){ + .queue_index = j, + .stream_size = cs_root_chunk_size(&clone_builder), + .stream_addr = cs_root_chunk_gpu_addr(&clone_builder), + .latest_flush = panthor_kmod_get_flush_id(dev->kmod.dev), + }; + + ut = &clone_ut; + } + u_trace_flush(ut, submit->utrace.data[j], dev->vk.current_frame, free_data); } diff --git a/src/panfrost/vulkan/csf/panvk_vX_utrace.c b/src/panfrost/vulkan/csf/panvk_vX_utrace.c index edab7ba0ce2..fa1b5e7223f 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_utrace.c +++ b/src/panfrost/vulkan/csf/panvk_vX_utrace.c @@ -114,3 +114,58 @@ panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx, cmd_copy_data(b, dst_addr, src_addr, size_B); } + +void +panvk_per_arch(utrace_clone_init_pool)(struct panvk_pool *pool, + struct panvk_device *dev) +{ + const struct panvk_pool_properties pool_props = { + .slab_size = 64 * 1024, + .label = "utrace clone pool", + .owns_bos = true, + }; + panvk_pool_init(pool, dev, NULL, &pool_props); +} + +static struct cs_buffer +alloc_clone_buffer(void *cookie) +{ + struct panvk_pool *pool = cookie; + const uint32_t size = 4 * 1024; + const uint32_t alignment = 64; + + struct panfrost_ptr ptr = + pan_pool_alloc_aligned(&pool->base, size, alignment); + + return (struct cs_buffer){ + .cpu = ptr.cpu, + .gpu = ptr.gpu, + .capacity = size, + }; +} + +void +panvk_per_arch(utrace_clone_init_builder)(struct cs_builder *b, + struct panvk_pool *pool) +{ + const struct cs_builder_conf builder_conf = { + .nr_registers = 96, + .nr_kernel_registers = 4, + .alloc_buffer = alloc_clone_buffer, + .cookie = pool, + }; + cs_builder_init(b, &builder_conf, (struct cs_buffer){0}); +} + +void +panvk_per_arch(utrace_clone_finish_builder)(struct cs_builder *b) +{ + const struct cs_index flush_id = cs_scratch_reg32(b, 0); + + cs_move32_to(b, flush_id, 0); + cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_NONE, false, + flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH))); + cs_wait_slot(b, SB_ID(IMM_FLUSH), false); + + cs_finish(b); +} diff --git a/src/panfrost/vulkan/panvk_utrace.c b/src/panfrost/vulkan/panvk_utrace.c index c5fb6f2dfd4..0c566782da9 100644 --- a/src/panfrost/vulkan/panvk_utrace.c +++ b/src/panfrost/vulkan/panvk_utrace.c @@ -75,5 +75,8 @@ panvk_utrace_delete_flush_data(struct u_trace_context *utctx, void *flush_data) { struct panvk_utrace_flush_data *data = flush_data; + if (data->clone_pool.dev) + panvk_pool_cleanup(&data->clone_pool); + free(data); } diff --git a/src/panfrost/vulkan/panvk_utrace.h b/src/panfrost/vulkan/panvk_utrace.h index 492c45102a6..0e20f75342b 100644 --- a/src/panfrost/vulkan/panvk_utrace.h +++ b/src/panfrost/vulkan/panvk_utrace.h @@ -9,6 +9,7 @@ #include "util/perf/u_trace.h" #include "panvk_macros.h" +#include "panvk_mempool.h" struct panvk_device; struct vk_sync; @@ -18,6 +19,8 @@ struct panvk_utrace_flush_data { struct vk_sync *sync; uint64_t wait_value; + + struct panvk_pool clone_pool; }; void *panvk_utrace_create_buffer(struct u_trace_context *utctx, @@ -43,6 +46,14 @@ void panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx, uint64_t from_offset, void *ts_to, uint64_t to_offset, uint64_t size_B); +struct cs_builder; + +void panvk_per_arch(utrace_clone_init_pool)(struct panvk_pool *pool, + struct panvk_device *dev); +void panvk_per_arch(utrace_clone_init_builder)(struct cs_builder *b, + struct panvk_pool *pool); +void panvk_per_arch(utrace_clone_finish_builder)(struct cs_builder *b); + #else /* PAN_ARCH >= 10 */ static inline void