panvk/csf: flush and process trace events for one-time cmdbufs

Flush and process trace events on queue submit when tracing is enabled
and timestamp_frequency is valid.  Tracing can be enabled with, for
example, MESA_GPU_TRACES=print.

panvk_utrace_flush_data is allocated on queue submit and is freed after
all trace events associated with the queue submit are processed.  It is
used for synchronize gpu timestamp writes and trace event processing.
It also specifies the subqueue the trace events belong to.

For the moment, cmdbufs without
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT are silently ignored.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32360>
This commit is contained in:
Chia-I Wu 2024-11-24 18:59:41 -08:00 committed by Marge Bot
parent b9631adb0e
commit e9d2a3c2c6
4 changed files with 127 additions and 2 deletions

View file

@ -722,6 +722,7 @@ struct panvk_queue_submit {
struct panvk_device *dev;
struct panvk_queue *queue;
bool process_utrace;
bool force_sync;
uint32_t used_queue_mask;
@ -733,6 +734,16 @@ struct panvk_queue_submit {
struct drm_panthor_queue_submit *qsubmits;
struct drm_panthor_sync_op *wait_ops;
struct drm_panthor_sync_op *signal_ops;
struct {
uint32_t queue_mask;
enum panvk_subqueue_id first_subqueue;
enum panvk_subqueue_id last_subqueue;
const struct u_trace *last_ut;
struct panvk_utrace_flush_data *data_storage;
struct panvk_utrace_flush_data *data[PANVK_SUBQUEUE_COUNT];
} utrace;
};
struct panvk_queue_submit_stack_storage {
@ -753,6 +764,10 @@ panvk_queue_submit_init(struct panvk_queue_submit *submit,
.queue = container_of(vk_queue, struct panvk_queue, vk),
};
submit->process_utrace =
u_trace_should_process(&submit->dev->utrace.utctx) &&
submit->phys_dev->kmod.props.timestamp_frequency;
submit->force_sync =
submit->instance->debug_flags & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC);
}
@ -762,6 +777,7 @@ panvk_queue_submit_init_storage(
struct panvk_queue_submit *submit, const struct vk_queue_submit *vk_submit,
struct panvk_queue_submit_stack_storage *stack_storage)
{
submit->utrace.first_subqueue = PANVK_SUBQUEUE_COUNT;
for (uint32_t i = 0; i < vk_submit->command_buffer_count; i++) {
struct panvk_cmd_buffer *cmdbuf = container_of(
vk_submit->command_buffers[i], struct panvk_cmd_buffer, vk);
@ -774,6 +790,16 @@ panvk_queue_submit_init_storage(
submit->used_queue_mask |= BITFIELD_BIT(j);
submit->qsubmit_count++;
struct u_trace *ut = &cmdbuf->utrace.uts[j];
if (submit->process_utrace && u_trace_has_points(ut) &&
(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
submit->utrace.queue_mask |= BITFIELD_BIT(j);
if (submit->utrace.first_subqueue == PANVK_SUBQUEUE_COUNT)
submit->utrace.first_subqueue = j;
submit->utrace.last_subqueue = j;
submit->utrace.last_ut = ut;
}
}
}
@ -784,7 +810,8 @@ panvk_queue_submit_init_storage(
uint32_t syncop_count = 0;
submit->needs_waits = vk_submit->wait_count > 0;
submit->needs_signals = vk_submit->signal_count > 0 || submit->force_sync;
submit->needs_signals = vk_submit->signal_count > 0 || submit->force_sync ||
submit->utrace.queue_mask;
/* We add sync-only queue submits to place our wait/signal operations. */
if (submit->needs_waits) {
@ -808,6 +835,12 @@ panvk_queue_submit_init_storage(
/* reset so that we can initialize submit->qsubmits incrementally */
submit->qsubmit_count = 0;
if (submit->utrace.queue_mask) {
submit->utrace.data_storage =
malloc(sizeof(*submit->utrace.data_storage) *
util_bitcount(submit->utrace.queue_mask));
}
}
static void
@ -819,6 +852,36 @@ panvk_queue_submit_cleanup_storage(
free(submit->qsubmits);
if (submit->wait_ops != stack_storage->syncops)
free(submit->wait_ops);
/* either no utrace flush data or the data has been transferred to u_trace */
assert(!submit->utrace.data_storage);
}
static void
panvk_queue_submit_init_utrace(struct panvk_queue_submit *submit,
const struct vk_queue_submit *vk_submit)
{
if (!submit->utrace.queue_mask)
return;
/* u_trace_context processes trace events in order. We want to make sure
* it waits for the timestamp writes before processing the first event and
* it can free the flush data after processing the last event.
*/
struct panvk_utrace_flush_data *next = submit->utrace.data_storage;
submit->utrace.data[submit->utrace.last_subqueue] = next++;
u_foreach_bit(i, submit->utrace.queue_mask) {
if (i != submit->utrace.last_subqueue)
submit->utrace.data[i] = next++;
const bool wait = i == submit->utrace.first_subqueue;
*submit->utrace.data[i] = (struct panvk_utrace_flush_data){
.subqueue = i,
.sync = wait ? submit->queue->utrace.sync : NULL,
.wait_value = wait ? submit->queue->utrace.next_value : 0,
};
}
}
static void
@ -857,6 +920,8 @@ static void
panvk_queue_submit_init_cmdbufs(struct panvk_queue_submit *submit,
const struct vk_queue_submit *vk_submit)
{
struct panvk_device *dev = submit->dev;
for (uint32_t i = 0; i < vk_submit->command_buffer_count; i++) {
struct panvk_cmd_buffer *cmdbuf = container_of(
vk_submit->command_buffers[i], struct panvk_cmd_buffer, vk);
@ -874,7 +939,22 @@ panvk_queue_submit_init_cmdbufs(struct panvk_queue_submit *submit,
.latest_flush = cmdbuf->flush_id,
};
}
u_foreach_bit(j, submit->utrace.queue_mask) {
struct u_trace *ut = &cmdbuf->utrace.uts[j];
if (!u_trace_has_points(ut) ||
!(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
continue;
const bool free_data = ut == submit->utrace.last_ut;
u_trace_flush(ut, submit->utrace.data[j], dev->vk.current_frame,
free_data);
}
}
/* we've transferred the data ownership to utrace, if any */
submit->utrace.data_storage = NULL;
}
static void
@ -978,6 +1058,18 @@ panvk_queue_submit_process_signals(struct panvk_queue_submit *submit,
queue->syncobj_handle, 0, 0);
}
if (submit->utrace.queue_mask) {
const struct vk_drm_syncobj *syncobj =
vk_sync_as_drm_syncobj(queue->utrace.sync);
drmSyncobjTransfer(dev->vk.drm_fd, syncobj->syncobj,
queue->utrace.next_value++, queue->syncobj_handle, 0,
0);
/* process flushed events after the syncobj is set up */
u_trace_context_process(&dev->utrace.utctx, false);
}
drmSyncobjReset(dev->vk.drm_fd, &queue->syncobj_handle, 1);
}
@ -1067,6 +1159,7 @@ panvk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *vk_submit)
panvk_queue_submit_init(&submit, vk_queue);
panvk_queue_submit_init_storage(&submit, vk_submit, &stack_storage);
panvk_queue_submit_init_utrace(&submit, vk_submit);
panvk_queue_submit_init_waits(&submit, vk_submit);
panvk_queue_submit_init_cmdbufs(&submit, vk_submit);
panvk_queue_submit_init_signals(&submit, vk_submit);

View file

@ -91,7 +91,7 @@ panvk_per_arch(utrace_context_init)(struct panvk_device *dev)
u_trace_context_init(&dev->utrace.utctx, NULL, sizeof(uint64_t), 0,
panvk_utrace_create_buffer, panvk_utrace_delete_buffer,
panvk_utrace_record_ts, panvk_utrace_read_ts, NULL,
NULL, NULL);
NULL, panvk_utrace_delete_flush_data);
}
void

View file

@ -6,10 +6,12 @@
#include "panvk_utrace.h"
#include "kmod/pan_kmod.h"
#include "util/log.h"
#include "util/timespec.h"
#include "panvk_device.h"
#include "panvk_physical_device.h"
#include "panvk_priv_bo.h"
#include "vk_sync.h"
static struct panvk_device *
to_dev(struct u_trace_context *utctx)
@ -46,9 +48,20 @@ panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
to_panvk_physical_device(dev->vk.physical);
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
const struct panvk_priv_bo *bo = timestamps;
struct panvk_utrace_flush_data *data = flush_data;
assert(props->timestamp_frequency);
/* wait for the submit */
if (data->sync) {
if (vk_sync_wait(&dev->vk, data->sync, data->wait_value,
VK_SYNC_WAIT_COMPLETE, UINT64_MAX) != VK_SUCCESS)
mesa_logw("failed to wait for utrace timestamps");
data->sync = NULL;
data->wait_value = 0;
}
const uint64_t *ts_ptr = bo->addr.host + offset_B;
uint64_t ts = *ts_ptr;
if (ts != U_TRACE_NO_TIMESTAMP)
@ -56,3 +69,11 @@ panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
return ts;
}
void
panvk_utrace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
{
struct panvk_utrace_flush_data *data = flush_data;
free(data);
}

View file

@ -11,6 +11,14 @@
#include "panvk_macros.h"
struct panvk_device;
struct vk_sync;
struct panvk_utrace_flush_data {
uint32_t subqueue;
struct vk_sync *sync;
uint64_t wait_value;
};
void *panvk_utrace_create_buffer(struct u_trace_context *utctx,
uint64_t size_B);
@ -20,6 +28,9 @@ void panvk_utrace_delete_buffer(struct u_trace_context *utctx, void *buffer);
uint64_t panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
uint64_t offset_B, void *flush_data);
void panvk_utrace_delete_flush_data(struct u_trace_context *utctx,
void *flush_data);
#ifdef PAN_ARCH
#if PAN_ARCH >= 10