mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 00:38:48 +02:00
tu/perfetto: Move away from single timeline for all apps
This moves from deprecated stage_id/hw_queue_id to per-context stage_iid/hw_queue_iid, which leads to separate timelines per app. There are several benefits to this: - Different driver versions could be used by different apps and perfetto won't confuse tracepoints. - Tracepoints from different apps may not align perfectly, so previously we got a fair amount of weird vertical ordering of tracepoints. The downside is that info is spread across several timelines multiplied by queues, but I think that's better since it is easier to understand which tracepoints correspond to which app. The changes are mostly copied from radeon/intel perfetto integration. This also fixes app_event emission along the way, previously debug_marker_stage was called _before_ SEQ_INCREMENTAL_STATE_CLEARED. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41105>
This commit is contained in:
parent
117f3cb1fc
commit
5b5bc956df
5 changed files with 179 additions and 125 deletions
|
|
@ -2712,9 +2712,6 @@ tu_device_destroy_mutexes(struct tu_device *device)
|
|||
mtx_destroy(&device->wave_pvtmem_bo.mtx);
|
||||
mtx_destroy(&device->mutex);
|
||||
mtx_destroy(&device->copy_timestamp_cs_pool_mutex);
|
||||
#ifdef HAVE_PERFETTO
|
||||
mtx_destroy(&device->perfetto.pending_clocks_sync_mtx);
|
||||
#endif
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
|
||||
mtx_destroy(&device->scratch_bos[i].construct_mtx);
|
||||
|
||||
|
|
@ -2830,7 +2827,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
mtx_init(&device->copy_timestamp_cs_pool_mutex, mtx_plain);
|
||||
mtx_init(&device->softfloat_mutex, mtx_plain);
|
||||
#ifdef HAVE_PERFETTO
|
||||
mtx_init(&device->perfetto.pending_clocks_sync_mtx, mtx_plain);
|
||||
tu_perfetto_init_state(&device->perfetto);
|
||||
#endif
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
|
||||
mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
|
||||
|
|
@ -3203,6 +3200,9 @@ fail_queues:
|
|||
vk_free(&device->vk.alloc, device->queues[i]);
|
||||
}
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
tu_perfetto_destroy_state(&device->perfetto);
|
||||
#endif
|
||||
tu_device_destroy_mutexes(device);
|
||||
tu_drm_device_finish(device);
|
||||
vk_device_finish(&device->vk);
|
||||
|
|
@ -4544,13 +4544,10 @@ tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
|
|||
* buffers. Still, getting the simple case of cmd buffer annotation into
|
||||
* perfetto should prove useful.
|
||||
*/
|
||||
const char *label = pLabelInfo->pLabelName;
|
||||
if (cmd_buffer->state.pass) {
|
||||
trace_start_cmd_buffer_annotation_rp(
|
||||
&cmd_buffer->trace, &cmd_buffer->draw_cs, cmd_buffer, strlen(label), label);
|
||||
trace_start_cmd_buffer_annotation_rp(&cmd_buffer->trace, &cmd_buffer->draw_cs, cmd_buffer);
|
||||
} else {
|
||||
trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
|
||||
cmd_buffer, strlen(label), label);
|
||||
trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs, cmd_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4559,11 +4556,16 @@ tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
|
|||
{
|
||||
VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
|
||||
|
||||
if (cmd_buffer->state.pass) {
|
||||
trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
|
||||
&cmd_buffer->draw_cs);
|
||||
} else {
|
||||
trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
|
||||
if (cmd_buffer->vk.labels.size > 0) {
|
||||
const VkDebugUtilsLabelEXT *label = util_dynarray_top_ptr(&cmd_buffer->vk.labels, VkDebugUtilsLabelEXT);
|
||||
|
||||
if (cmd_buffer->state.pass) {
|
||||
trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label->pLabelName),
|
||||
label->pLabelName);
|
||||
} else {
|
||||
trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs, strlen(label->pLabelName),
|
||||
label->pLabelName);
|
||||
}
|
||||
}
|
||||
|
||||
vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
|
||||
|
|
@ -4574,11 +4576,12 @@ tu_SetDebugUtilsObjectNameEXT(
|
|||
VkDevice device,
|
||||
const VkDebugUtilsObjectNameInfoEXT *pNameInfo)
|
||||
{
|
||||
UNUSED VK_FROM_HANDLE(tu_device, dev, device);
|
||||
VkResult result = vk_common_SetDebugUtilsObjectNameEXT(device, pNameInfo);
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
if (result == VK_SUCCESS)
|
||||
tu_perfetto_set_debug_utils_object_name(pNameInfo);
|
||||
tu_perfetto_set_debug_utils_object_name(dev, pNameInfo);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include <perfetto.h>
|
||||
|
||||
#include "util/u_process.h"
|
||||
#include "util/perf/u_perfetto_renderpass.h"
|
||||
|
||||
#include "tu_buffer.h"
|
||||
|
|
@ -30,75 +31,13 @@ tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts);
|
|||
struct u_trace_context *
|
||||
tu_device_get_u_trace(struct tu_device *device);
|
||||
|
||||
/**
|
||||
* Queue-id's
|
||||
*/
|
||||
enum tu_queue_id {
|
||||
BR_HW_QUEUE_ID,
|
||||
BV_HW_QUEUE_ID,
|
||||
|
||||
/* Labels set via VK_EXT_debug_utils are in a separate track due to the
|
||||
* following part of the spec:
|
||||
* "An application may open a debug label region in one command buffer and
|
||||
* close it in another, or otherwise split debug label regions across
|
||||
* multiple command buffers or multiple queue submissions."
|
||||
*
|
||||
* This means labels can start in one renderpass and end in another command
|
||||
* buffer, which breaks our assumption that stages can be modeled as a stack.
|
||||
* While applications aren't expected to use labels in such extreme ways,
|
||||
* even simpler cases can break our assumptions.
|
||||
*
|
||||
* Having annotations in a separate track prevents the main track(s) from
|
||||
* entering an invalid state.
|
||||
*/
|
||||
ANNOTATIONS_QUEUE_ID,
|
||||
};
|
||||
|
||||
/**
|
||||
* Render-stage id's
|
||||
*/
|
||||
enum tu_stage_id {
|
||||
CMD_BUFFER_STAGE_ID,
|
||||
CMD_BUFFER_ANNOTATION_STAGE_ID,
|
||||
RENDER_PASS_STAGE_ID,
|
||||
SECONDARY_CMD_BUFFER_STAGE_ID,
|
||||
CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
|
||||
BINNING_STAGE_ID,
|
||||
CONCURRENT_BINNING_STAGE_ID,
|
||||
CONCURRENT_BINNING_BARRIER_STAGE_ID,
|
||||
GMEM_STAGE_ID,
|
||||
BYPASS_STAGE_ID,
|
||||
BLIT_STAGE_ID,
|
||||
DRAW_STAGE_ID,
|
||||
COMPUTE_STAGE_ID,
|
||||
CLEAR_SYSMEM_STAGE_ID,
|
||||
CLEAR_GMEM_STAGE_ID,
|
||||
GENERIC_CLEAR_STAGE_ID,
|
||||
GMEM_LOAD_STAGE_ID,
|
||||
GMEM_STORE_STAGE_ID,
|
||||
SYSMEM_RESOLVE_STAGE_ID,
|
||||
CUSTOM_RESOLVE_STAGE_ID,
|
||||
CLEAR_COLOR_IMAGE_STAGE_ID,
|
||||
CLEAR_DEPTH_STENCIL_IMAGE_STAGE_ID,
|
||||
COPY_BUFFER_TO_IMAGE_STAGE_ID,
|
||||
COPY_IMAGE_TO_BUFFER_STAGE_ID,
|
||||
COPY_IMAGE_STAGE_ID,
|
||||
RESOLVE_IMAGE_STAGE_ID,
|
||||
FILL_BUFFER_STAGE_ID,
|
||||
COPY_BUFFER_STAGE_ID,
|
||||
UPDATE_BUFFER_STAGE_ID,
|
||||
SLOW_CLEAR_LRZ_STAGE_ID,
|
||||
DISABLE_LRZ_STAGE_ID,
|
||||
// TODO add the rest from fd_stage_id
|
||||
};
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
const char *desc;
|
||||
} queues[] = {
|
||||
[ANNOTATIONS_QUEUE_ID] = {"Annotations", "Annotations Queue"},
|
||||
[BR_HW_QUEUE_ID] = {"GPU Queue 0", "Default Adreno Hardware Queue"},
|
||||
[BV_HW_QUEUE_ID] = {"GPU Queue 1", "Adreno Bin Visibility Queue"},
|
||||
[ANNOTATIONS_QUEUE_ID] = {"Annotations", "Annotations Queue"},
|
||||
};
|
||||
|
||||
static const struct {
|
||||
|
|
@ -140,6 +79,14 @@ static const struct {
|
|||
};
|
||||
|
||||
static uint32_t gpu_clock_id;
|
||||
|
||||
static uint64_t
|
||||
get_iid()
|
||||
{
|
||||
static uint64_t iid = 1;
|
||||
return p_atomic_inc_return(&iid);
|
||||
}
|
||||
|
||||
struct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits {
|
||||
using IncrementalStateType = MesaRenderpassIncrementalState;
|
||||
};
|
||||
|
|
@ -189,7 +136,8 @@ emit_sync_timestamp(struct tu_perfetto_clocks &clocks)
|
|||
}
|
||||
|
||||
static void
|
||||
setup_incremental_state(TuRenderpassDataSource::TraceContext &ctx)
|
||||
setup_incremental_state(TuRenderpassDataSource::TraceContext &ctx,
|
||||
struct tu_device *dev)
|
||||
{
|
||||
auto state = ctx.GetIncrementalState();
|
||||
if (!state->was_cleared)
|
||||
|
|
@ -201,32 +149,69 @@ setup_incremental_state(TuRenderpassDataSource::TraceContext &ctx)
|
|||
|
||||
auto packet = ctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
|
||||
packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
|
||||
/* This must be set before interned data is sent. */
|
||||
packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
|
||||
|
||||
packet->set_timestamp(0);
|
||||
auto interned_data = packet->set_interned_data();
|
||||
|
||||
auto event = packet->set_gpu_render_stage_event();
|
||||
event->set_gpu_id(0);
|
||||
|
||||
auto spec = event->set_specifications();
|
||||
{
|
||||
auto desc = interned_data->add_graphics_contexts();
|
||||
desc->set_iid(dev->perfetto.context_iid);
|
||||
desc->set_pid(getpid());
|
||||
desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api::VULKAN);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
|
||||
auto desc = spec->add_hw_queue();
|
||||
char name[100];
|
||||
auto desc = interned_data->add_gpu_specifications();
|
||||
|
||||
desc->set_name(queues[i].name);
|
||||
snprintf(name, sizeof(name), "%.10s-%02u-%s",
|
||||
util_get_process_name(), i, queues[i].name);
|
||||
desc->set_iid(dev->perfetto.queue_iids[i]);
|
||||
desc->set_name(name);
|
||||
desc->set_description(queues[i].desc);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
|
||||
auto desc = spec->add_stage();
|
||||
auto desc = interned_data->add_gpu_specifications();
|
||||
|
||||
desc->set_iid(dev->perfetto.stage_iids[i]);
|
||||
desc->set_name(stages[i].name);
|
||||
if (stages[i].desc)
|
||||
desc->set_description(stages[i].desc);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tu_perfetto_init_state(struct tu_perfetto_state *state)
|
||||
{
|
||||
mtx_init(&state->pending_clocks_sync_mtx, mtx_plain);
|
||||
|
||||
state->context_iid = get_iid();
|
||||
state->event_id = 0;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(state->queue_iids); i++)
|
||||
state->queue_iids[i] = get_iid();
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(state->stage_iids); i++)
|
||||
state->stage_iids[i] = get_iid();
|
||||
}
|
||||
|
||||
void
|
||||
tu_perfetto_destroy_state(struct tu_perfetto_state *state)
|
||||
{
|
||||
mtx_destroy(&state->pending_clocks_sync_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
stage_cleanup(struct tu_perfetto_stage *stage)
|
||||
{
|
||||
free((void *) stage->payload);
|
||||
|
||||
stage->payload = nullptr;
|
||||
stage->start_payload_function = nullptr;
|
||||
}
|
||||
|
||||
static struct tu_perfetto_stage *
|
||||
stage_push(struct tu_perfetto_stage_stack *stack)
|
||||
{
|
||||
|
|
@ -268,7 +253,6 @@ static void
|
|||
stage_start(struct tu_device *dev,
|
||||
uint64_t ts_ns,
|
||||
enum tu_stage_id stage_id,
|
||||
const char *app_event,
|
||||
const void *payload = nullptr,
|
||||
size_t payload_size = 0,
|
||||
const void *indirect = nullptr,
|
||||
|
|
@ -293,22 +277,15 @@ stage_start(struct tu_device *dev,
|
|||
|
||||
*stage = (struct tu_perfetto_stage) {
|
||||
.stage_id = stage_id,
|
||||
.stage_iid = 0,
|
||||
.start_ts = ts_ns,
|
||||
.payload = payload,
|
||||
.start_payload_function = (void *) payload_as_extra,
|
||||
};
|
||||
|
||||
if (app_event) {
|
||||
TuRenderpassDataSource::Trace([=](auto tctx) {
|
||||
stage->stage_iid =
|
||||
tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
|
||||
const char *app_event,
|
||||
const void *flush_data,
|
||||
const void* payload = nullptr,
|
||||
const void *indirect = nullptr,
|
||||
|
|
@ -327,12 +304,15 @@ stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
|
|||
|
||||
uint64_t duration = ts_ns - stage->start_ts;
|
||||
/* Zero duration can only happen when tracepoints did not happen on GPU. */
|
||||
if (duration == 0)
|
||||
if (duration == 0) {
|
||||
stage_cleanup(stage);
|
||||
return;
|
||||
}
|
||||
|
||||
if (stage->stage_id != stage_id) {
|
||||
PERFETTO_ELOG("stage %d ended while stage %d is expected",
|
||||
stage_id, stage->stage_id);
|
||||
stage_cleanup(stage);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -366,7 +346,11 @@ stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
|
|||
}
|
||||
|
||||
TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
|
||||
setup_incremental_state(tctx);
|
||||
setup_incremental_state(tctx, dev);
|
||||
|
||||
uint64_t stage_iid = app_event ?
|
||||
tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event) :
|
||||
state->stage_iids[stage->stage_id];
|
||||
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
|
|
@ -376,26 +360,24 @@ stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
|
|||
packet->set_timestamp_clock_id(gpu_clock_id);
|
||||
|
||||
auto event = packet->set_gpu_render_stage_event();
|
||||
event->set_event_id(0); // ???
|
||||
event->set_hw_queue_id(queue_id);
|
||||
event->set_event_id(state->event_id++);
|
||||
event->set_hw_queue_iid(state->queue_iids[queue_id]);
|
||||
event->set_duration(ts_ns - stage->start_ts);
|
||||
if (stage->stage_iid)
|
||||
event->set_stage_iid(stage->stage_iid);
|
||||
else
|
||||
event->set_stage_id(stage->stage_id);
|
||||
event->set_context((uintptr_t) dev);
|
||||
event->set_stage_iid(stage_iid);
|
||||
event->set_context(state->context_iid);
|
||||
event->set_submission_id(submission_id);
|
||||
|
||||
if (stage->payload) {
|
||||
if (stage->start_payload_function)
|
||||
((trace_payload_as_extra_func) stage->start_payload_function)(
|
||||
event, stage->payload, nullptr);
|
||||
free((void *)stage->payload);
|
||||
}
|
||||
|
||||
if (payload && payload_as_extra)
|
||||
payload_as_extra(event, payload, indirect);
|
||||
});
|
||||
|
||||
stage_cleanup(stage);
|
||||
}
|
||||
|
||||
class TuMemoryDataSource : public perfetto::DataSource<TuMemoryDataSource> {
|
||||
|
|
@ -591,7 +573,7 @@ tu_perfetto_end_submit(struct tu_queue *queue,
|
|||
const void *indirect_data) \
|
||||
{ \
|
||||
stage_start( \
|
||||
dev, ts_ns, stage_id, NULL, payload, sizeof(*payload), indirect_data, \
|
||||
dev, ts_ns, stage_id, payload, sizeof(*payload), indirect_data, \
|
||||
(trace_payload_as_extra_func) &trace_payload_as_extra_start_##event_name); \
|
||||
} \
|
||||
\
|
||||
|
|
@ -601,7 +583,7 @@ tu_perfetto_end_submit(struct tu_queue *queue,
|
|||
const void *indirect_data) \
|
||||
{ \
|
||||
stage_end( \
|
||||
dev, ts_ns, stage_id, flush_data, payload, indirect_data, \
|
||||
dev, ts_ns, stage_id, NULL, flush_data, payload, indirect_data, \
|
||||
(trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name); \
|
||||
}
|
||||
|
||||
|
|
@ -647,7 +629,7 @@ tu_perfetto_start_cmd_buffer_annotation(
|
|||
const void *indirect_data)
|
||||
{
|
||||
/* No extra func necessary, the only arg is in the end payload.*/
|
||||
stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload->str, payload,
|
||||
stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload,
|
||||
sizeof(*payload), NULL);
|
||||
}
|
||||
|
||||
|
|
@ -663,7 +645,7 @@ tu_perfetto_end_cmd_buffer_annotation(
|
|||
/* Pass the payload string as the app_event, which will appear right on the
|
||||
* event block, rather than as metadata inside.
|
||||
*/
|
||||
stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, flush_data,
|
||||
stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload->str, flush_data,
|
||||
payload, NULL);
|
||||
}
|
||||
|
||||
|
|
@ -678,7 +660,7 @@ tu_perfetto_start_cmd_buffer_annotation_rp(
|
|||
{
|
||||
/* No extra func necessary, the only arg is in the end payload.*/
|
||||
stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
|
||||
payload->str, payload, sizeof(*payload), NULL);
|
||||
payload, sizeof(*payload), NULL);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -694,7 +676,7 @@ tu_perfetto_end_cmd_buffer_annotation_rp(
|
|||
* event block, rather than as metadata inside.
|
||||
*/
|
||||
stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
|
||||
flush_data, payload, NULL);
|
||||
payload->str, flush_data, payload, NULL);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -773,22 +755,22 @@ tu_perfetto_log_destroy_image(struct tu_device *dev, struct tu_image *image)
|
|||
|
||||
|
||||
void
|
||||
tu_perfetto_set_debug_utils_object_name(const VkDebugUtilsObjectNameInfoEXT *pNameInfo)
|
||||
tu_perfetto_set_debug_utils_object_name(struct tu_device *dev, const VkDebugUtilsObjectNameInfoEXT *pNameInfo)
|
||||
{
|
||||
TuRenderpassDataSource::Trace([=](auto tctx) {
|
||||
/* Do we need this for SEQ_INCREMENTAL_STATE_CLEARED for the object name to stick? */
|
||||
setup_incremental_state(tctx);
|
||||
setup_incremental_state(tctx, dev);
|
||||
|
||||
tctx.GetDataSourceLocked()->SetDebugUtilsObjectNameEXT(tctx, pNameInfo);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
tu_perfetto_refresh_debug_utils_object_name(const struct vk_object_base *object)
|
||||
tu_perfetto_refresh_debug_utils_object_name(struct tu_device *dev, const struct vk_object_base *object)
|
||||
{
|
||||
TuRenderpassDataSource::Trace([=](auto tctx) {
|
||||
/* Do we need this for SEQ_INCREMENTAL_STATE_CLEARED for the object name to stick? */
|
||||
setup_incremental_state(tctx);
|
||||
setup_incremental_state(tctx, dev);
|
||||
|
||||
tctx.GetDataSourceLocked()->RefreshSetDebugUtilsObjectNameEXT(tctx, object);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -20,16 +20,76 @@ extern "C" {
|
|||
|
||||
#define TU_PERFETTO_MAX_STACK_DEPTH 8
|
||||
|
||||
/**
|
||||
* Queue-id's
|
||||
*/
|
||||
enum tu_queue_id {
|
||||
/* Labels set via VK_EXT_debug_utils are in a separate track due to the
|
||||
* following part of the spec:
|
||||
* "An application may open a debug label region in one command buffer and
|
||||
* close it in another, or otherwise split debug label regions across
|
||||
* multiple command buffers or multiple queue submissions."
|
||||
*
|
||||
* This means labels can start in one renderpass and end in another command
|
||||
* buffer, which breaks our assumption that stages can be modeled as a stack.
|
||||
* While applications aren't expected to use labels in such extreme ways,
|
||||
* even simpler cases can break our assumptions.
|
||||
*
|
||||
* Having annotations in a separate track prevents the main track(s) from
|
||||
* entering an invalid state.
|
||||
*/
|
||||
ANNOTATIONS_QUEUE_ID,
|
||||
BR_HW_QUEUE_ID,
|
||||
BV_HW_QUEUE_ID,
|
||||
|
||||
TU_QUEUE_ID_COUNT,
|
||||
};
|
||||
|
||||
/**
|
||||
* Render-stage id's
|
||||
*/
|
||||
enum tu_stage_id {
|
||||
CMD_BUFFER_STAGE_ID,
|
||||
CMD_BUFFER_ANNOTATION_STAGE_ID,
|
||||
RENDER_PASS_STAGE_ID,
|
||||
SECONDARY_CMD_BUFFER_STAGE_ID,
|
||||
CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
|
||||
BINNING_STAGE_ID,
|
||||
CONCURRENT_BINNING_STAGE_ID,
|
||||
CONCURRENT_BINNING_BARRIER_STAGE_ID,
|
||||
GMEM_STAGE_ID,
|
||||
BYPASS_STAGE_ID,
|
||||
BLIT_STAGE_ID,
|
||||
DRAW_STAGE_ID,
|
||||
COMPUTE_STAGE_ID,
|
||||
CLEAR_SYSMEM_STAGE_ID,
|
||||
CLEAR_GMEM_STAGE_ID,
|
||||
GENERIC_CLEAR_STAGE_ID,
|
||||
GMEM_LOAD_STAGE_ID,
|
||||
GMEM_STORE_STAGE_ID,
|
||||
SYSMEM_RESOLVE_STAGE_ID,
|
||||
CUSTOM_RESOLVE_STAGE_ID,
|
||||
CLEAR_COLOR_IMAGE_STAGE_ID,
|
||||
CLEAR_DEPTH_STENCIL_IMAGE_STAGE_ID,
|
||||
COPY_BUFFER_TO_IMAGE_STAGE_ID,
|
||||
COPY_IMAGE_TO_BUFFER_STAGE_ID,
|
||||
COPY_IMAGE_STAGE_ID,
|
||||
RESOLVE_IMAGE_STAGE_ID,
|
||||
FILL_BUFFER_STAGE_ID,
|
||||
COPY_BUFFER_STAGE_ID,
|
||||
UPDATE_BUFFER_STAGE_ID,
|
||||
SLOW_CLEAR_LRZ_STAGE_ID,
|
||||
DISABLE_LRZ_STAGE_ID,
|
||||
|
||||
TU_STAGE_ID_COUNT,
|
||||
};
|
||||
|
||||
struct tu_device;
|
||||
struct tu_queue;
|
||||
struct tu_u_trace_submission_data;
|
||||
|
||||
struct tu_perfetto_stage {
|
||||
int stage_id;
|
||||
/* dynamically allocated stage iid, for app_events. 0 if stage_id should be
|
||||
* used instead.
|
||||
*/
|
||||
uint64_t stage_iid;
|
||||
enum tu_stage_id stage_id;
|
||||
uint64_t start_ts;
|
||||
const void* payload;
|
||||
void* start_payload_function;
|
||||
|
|
@ -52,6 +112,11 @@ struct tu_perfetto_state {
|
|||
struct tu_perfetto_stage_stack annotations_stack;
|
||||
struct tu_perfetto_stage_stack render_stack;
|
||||
|
||||
uint64_t context_iid;
|
||||
uint64_t queue_iids[TU_QUEUE_ID_COUNT];
|
||||
uint64_t stage_iids[TU_STAGE_ID_COUNT];
|
||||
uint64_t event_id;
|
||||
|
||||
bool has_pending_clocks_sync;
|
||||
mtx_t pending_clocks_sync_mtx;
|
||||
struct tu_perfetto_clocks pending_clocks_sync;
|
||||
|
|
@ -66,6 +131,8 @@ struct tu_perfetto_state {
|
|||
};
|
||||
|
||||
void tu_perfetto_init(void);
|
||||
void tu_perfetto_init_state(struct tu_perfetto_state *state);
|
||||
void tu_perfetto_destroy_state(struct tu_perfetto_state *state);
|
||||
|
||||
uint64_t
|
||||
tu_perfetto_begin_submit();
|
||||
|
|
@ -86,10 +153,12 @@ void tu_perfetto_log_destroy_image(struct tu_device *dev, struct tu_image *image
|
|||
|
||||
void
|
||||
tu_perfetto_set_debug_utils_object_name(
|
||||
struct tu_device *dev,
|
||||
const VkDebugUtilsObjectNameInfoEXT *pNameInfo);
|
||||
|
||||
void
|
||||
tu_perfetto_refresh_debug_utils_object_name(
|
||||
struct tu_device *dev,
|
||||
const struct vk_object_base *object);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -548,7 +548,7 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
|
|||
#ifdef HAVE_PERFETTO
|
||||
if (u_trace_should_process(&device->trace_context)) {
|
||||
for (int i = 0; i < vk_submit->command_buffer_count; i++)
|
||||
tu_perfetto_refresh_debug_utils_object_name(
|
||||
tu_perfetto_refresh_debug_utils_object_name(device,
|
||||
&vk_submit->command_buffers[i]->base);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -266,9 +266,9 @@ begin_end_tp('compute_indirect',
|
|||
# Annotations for Cmd(Begin|End)DebugUtilsLabelEXT
|
||||
for suffix in ["", "_rp"]:
|
||||
begin_end_tp('cmd_buffer_annotation' + suffix,
|
||||
args=[Arg(type='unsigned', var='len'),
|
||||
Arg(type='str', var='str', c_format='%s', length_arg='len + 1', copy_func='strncpy'),],
|
||||
tp_struct=[Arg(type='uint8_t', name='dummy', var='0'),])
|
||||
end_args=[Arg(type='unsigned', var='len'),
|
||||
Arg(type='str', var='str', c_format='%s', length_arg='len + 1', copy_func='strncpy'),],
|
||||
end_tp_struct=[Arg(type='uint8_t', name='dummy', var='0'),])
|
||||
|
||||
utrace_generate(cpath=args.utrace_src,
|
||||
hpath=args.utrace_hdr,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue