panvk/csf: flush and process trace events for all cmdbufs

When a cmdbuf does not have VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
call u_trace_clone_append to clone trace events and to copy timestamps.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32360>
This commit is contained in:
Chia-I Wu 2024-12-05 14:03:58 -08:00 committed by Marge Bot
parent e9d2a3c2c6
commit 3096cf2a5d
4 changed files with 114 additions and 4 deletions

View file

@ -739,6 +739,7 @@ struct panvk_queue_submit {
uint32_t queue_mask;
enum panvk_subqueue_id first_subqueue;
enum panvk_subqueue_id last_subqueue;
bool needs_clone;
const struct u_trace *last_ut;
struct panvk_utrace_flush_data *data_storage;
@ -792,13 +793,19 @@ panvk_queue_submit_init_storage(
submit->qsubmit_count++;
struct u_trace *ut = &cmdbuf->utrace.uts[j];
if (submit->process_utrace && u_trace_has_points(ut) &&
(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
if (submit->process_utrace && u_trace_has_points(ut)) {
submit->utrace.queue_mask |= BITFIELD_BIT(j);
if (submit->utrace.first_subqueue == PANVK_SUBQUEUE_COUNT)
submit->utrace.first_subqueue = j;
submit->utrace.last_subqueue = j;
submit->utrace.last_ut = ut;
if (!(cmdbuf->flags &
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
/* we will follow the user cs with a timestamp copy cs */
submit->qsubmit_count++;
submit->utrace.needs_clone = true;
}
}
}
}
@ -861,6 +868,8 @@ static void
panvk_queue_submit_init_utrace(struct panvk_queue_submit *submit,
const struct vk_queue_submit *vk_submit)
{
struct panvk_device *dev = submit->dev;
if (!submit->utrace.queue_mask)
return;
@ -882,6 +891,11 @@ panvk_queue_submit_init_utrace(struct panvk_queue_submit *submit,
.wait_value = wait ? submit->queue->utrace.next_value : 0,
};
}
if (submit->utrace.needs_clone) {
struct panvk_pool *clone_pool = &submit->utrace.data_storage->clone_pool;
panvk_per_arch(utrace_clone_init_pool)(clone_pool, dev);
}
}
static void
@ -943,11 +957,38 @@ panvk_queue_submit_init_cmdbufs(struct panvk_queue_submit *submit,
u_foreach_bit(j, submit->utrace.queue_mask) {
struct u_trace *ut = &cmdbuf->utrace.uts[j];
if (!u_trace_has_points(ut) ||
!(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
if (!u_trace_has_points(ut))
continue;
const bool free_data = ut == submit->utrace.last_ut;
struct u_trace clone_ut;
if (!(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
u_trace_init(&clone_ut, &dev->utrace.utctx);
struct panvk_pool *clone_pool =
&submit->utrace.data_storage->clone_pool;
struct cs_builder clone_builder;
panvk_per_arch(utrace_clone_init_builder)(&clone_builder,
clone_pool);
u_trace_clone_append(
u_trace_begin_iterator(ut), u_trace_end_iterator(ut), &clone_ut,
&clone_builder, panvk_per_arch(utrace_copy_buffer));
panvk_per_arch(utrace_clone_finish_builder)(&clone_builder);
submit->qsubmits[submit->qsubmit_count++] =
(struct drm_panthor_queue_submit){
.queue_index = j,
.stream_size = cs_root_chunk_size(&clone_builder),
.stream_addr = cs_root_chunk_gpu_addr(&clone_builder),
.latest_flush = panthor_kmod_get_flush_id(dev->kmod.dev),
};
ut = &clone_ut;
}
u_trace_flush(ut, submit->utrace.data[j], dev->vk.current_frame,
free_data);
}

View file

@ -114,3 +114,58 @@ panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx,
cmd_copy_data(b, dst_addr, src_addr, size_B);
}
void
panvk_per_arch(utrace_clone_init_pool)(struct panvk_pool *pool,
struct panvk_device *dev)
{
const struct panvk_pool_properties pool_props = {
.slab_size = 64 * 1024,
.label = "utrace clone pool",
.owns_bos = true,
};
panvk_pool_init(pool, dev, NULL, &pool_props);
}
static struct cs_buffer
alloc_clone_buffer(void *cookie)
{
struct panvk_pool *pool = cookie;
const uint32_t size = 4 * 1024;
const uint32_t alignment = 64;
struct panfrost_ptr ptr =
pan_pool_alloc_aligned(&pool->base, size, alignment);
return (struct cs_buffer){
.cpu = ptr.cpu,
.gpu = ptr.gpu,
.capacity = size,
};
}
void
panvk_per_arch(utrace_clone_init_builder)(struct cs_builder *b,
struct panvk_pool *pool)
{
const struct cs_builder_conf builder_conf = {
.nr_registers = 96,
.nr_kernel_registers = 4,
.alloc_buffer = alloc_clone_buffer,
.cookie = pool,
};
cs_builder_init(b, &builder_conf, (struct cs_buffer){0});
}
void
panvk_per_arch(utrace_clone_finish_builder)(struct cs_builder *b)
{
const struct cs_index flush_id = cs_scratch_reg32(b, 0);
cs_move32_to(b, flush_id, 0);
cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_NONE, false,
flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
cs_finish(b);
}

View file

@ -75,5 +75,8 @@ panvk_utrace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
{
struct panvk_utrace_flush_data *data = flush_data;
if (data->clone_pool.dev)
panvk_pool_cleanup(&data->clone_pool);
free(data);
}

View file

@ -9,6 +9,7 @@
#include "util/perf/u_trace.h"
#include "panvk_macros.h"
#include "panvk_mempool.h"
struct panvk_device;
struct vk_sync;
@ -18,6 +19,8 @@ struct panvk_utrace_flush_data {
struct vk_sync *sync;
uint64_t wait_value;
struct panvk_pool clone_pool;
};
void *panvk_utrace_create_buffer(struct u_trace_context *utctx,
@ -43,6 +46,14 @@ void panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx,
uint64_t from_offset, void *ts_to,
uint64_t to_offset, uint64_t size_B);
struct cs_builder;
void panvk_per_arch(utrace_clone_init_pool)(struct panvk_pool *pool,
struct panvk_device *dev);
void panvk_per_arch(utrace_clone_init_builder)(struct cs_builder *b,
struct panvk_pool *pool);
void panvk_per_arch(utrace_clone_finish_builder)(struct cs_builder *b);
#else /* PAN_ARCH >= 10 */
static inline void