mesa/src/intel/ds/intel_driver_ds.cc
Sami Kyöstilä b936d638ab intel/ds: Update to Perfetto API v28.0
Perfetto v28.0 changes the naming scheme of generated enum constants.
Update our code to match and roll the version of Perfetto forward
accordingly.

No functional changes.

Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18056>
2022-08-19 18:09:43 +00:00

587 lines
19 KiB
C++

/*
* Copyright © 2021 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdio.h>
#include <stdarg.h>
#include "common/intel_gem.h"
#include "perf/intel_perf.h"
#include "util/hash_table.h"
#include "util/u_process.h"
#include "intel_driver_ds.h"
#include "intel_pps_priv.h"
#include "intel_tracepoints.h"
#ifdef HAVE_PERFETTO
#include "util/u_perfetto.h"
#include "intel_tracepoints_perfetto.h"
/* Just naming stages */
static const struct {
const char *name;
/* Tells us if a given stage is pipelined. This is used to build stacks of
* pipelined elements so that the perfetto UI doesn't get confused by elements
* ending out of order.
*/
bool pipelined;
/* The perfetto UI requires that there is a parent-child relationship
* within a row of elements. Which means that all children elements must
* end within the lifespan of their parent.
*
* Some elements like stalls and command buffers follow that relationship,
* but not all. This tells us in which UI row the elements should live.
*/
enum intel_ds_queue_stage draw_stage;
} intel_queue_stage_desc[INTEL_DS_QUEUE_STAGE_N_STAGES] = {
/* Order must match the enum! */
{
"cmd-buffer",
false,
INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
},
{
"stall",
false,
INTEL_DS_QUEUE_STAGE_STALL,
},
{
"compute",
true,
INTEL_DS_QUEUE_STAGE_COMPUTE,
},
{
"render-pass",
true,
INTEL_DS_QUEUE_STAGE_RENDER_PASS,
},
{
"blorp",
true,
INTEL_DS_QUEUE_STAGE_BLORP,
},
{
"draw",
true,
INTEL_DS_QUEUE_STAGE_DRAW,
},
};
struct IntelRenderpassIncrementalState {
bool was_cleared = true;
};
struct IntelRenderpassTraits : public perfetto::DefaultDataSourceTraits {
using IncrementalStateType = IntelRenderpassIncrementalState;
};
class IntelRenderpassDataSource : public perfetto::DataSource<IntelRenderpassDataSource,
IntelRenderpassTraits> {
public:
void OnSetup(const SetupArgs &) override
{
// Use this callback to apply any custom configuration to your data source
// based on the TraceConfig in SetupArgs.
}
void OnStart(const StartArgs &) override
{
// This notification can be used to initialize the GPU driver, enable
// counters, etc. StartArgs will contains the DataSourceDescriptor,
// which can be extended.
u_trace_perfetto_start();
PERFETTO_LOG("Tracing started");
}
void OnStop(const StopArgs &) override
{
PERFETTO_LOG("Tracing stopped");
// Undo any initialization done in OnStart.
u_trace_perfetto_stop();
// TODO we should perhaps block until queued traces are flushed?
Trace([](IntelRenderpassDataSource::TraceContext ctx) {
auto packet = ctx.NewTracePacket();
packet->Finalize();
ctx.Flush();
});
}
};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
using perfetto::protos::pbzero::InternedGpuRenderStageSpecification_RenderStageCategory;
InternedGpuRenderStageSpecification_RenderStageCategory
i915_engine_class_to_category(enum drm_i915_gem_engine_class engine_class)
{
switch (engine_class) {
case I915_ENGINE_CLASS_RENDER:
return InternedGpuRenderStageSpecification_RenderStageCategory::GRAPHICS;
default:
return InternedGpuRenderStageSpecification_RenderStageCategory::OTHER;
}
}
static void
sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx,
struct intel_ds_device *device)
{
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
uint64_t gpu_ts = intel_device_info_timebase_scale(&device->info,
intel_read_gpu_timestamp(device->fd));
if (cpu_ts < device->next_clock_sync_ns)
return;
PERFETTO_LOG("sending clocks gpu=0x%08x", device->gpu_clock_id);
device->sync_gpu_ts = gpu_ts;
device->next_clock_sync_ns = cpu_ts + 1000000000ull;
auto packet = ctx.NewTracePacket();
packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
packet->set_timestamp(cpu_ts);
auto event = packet->set_clock_snapshot();
{
auto clock = event->add_clocks();
clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
clock->set_timestamp(cpu_ts);
}
{
auto clock = event->add_clocks();
clock->set_clock_id(device->gpu_clock_id);
clock->set_timestamp(gpu_ts);
}
}
static void
send_descriptors(IntelRenderpassDataSource::TraceContext &ctx,
struct intel_ds_device *device)
{
struct intel_ds_queue *queue;
PERFETTO_LOG("Sending renderstage descriptors");
device->event_id = 0;
u_vector_foreach(queue, &device->queues) {
for (uint32_t s = 0; s < ARRAY_SIZE(queue->stages); s++) {
queue->stages[s].start_ns = 0;
}
}
{
auto packet = ctx.NewTracePacket();
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
auto interned_data = packet->set_interned_data();
{
auto desc = interned_data->add_graphics_contexts();
desc->set_iid(device->iid);
desc->set_pid(getpid());
switch (device->api) {
case INTEL_DS_API_OPENGL:
desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api::OPEN_GL);
break;
case INTEL_DS_API_VULKAN:
desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api::VULKAN);
break;
default:
break;
}
}
/* Emit all the IID picked at device/queue creation. */
u_vector_foreach(queue, &device->queues) {
for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
{
/* We put the stage number in there so that all rows are order
* by intel_ds_queue_stage.
*/
char name[100];
snprintf(name, sizeof(name), "%.10s-%s-%u-%s",
util_get_process_name(),
queue->name, s, intel_queue_stage_desc[s].name);
auto desc = interned_data->add_gpu_specifications();
desc->set_iid(queue->stages[s].queue_iid);
desc->set_name(name);
}
{
auto desc = interned_data->add_gpu_specifications();
desc->set_iid(queue->stages[s].stage_iid);
desc->set_name(intel_queue_stage_desc[s].name);
}
}
}
}
device->next_clock_sync_ns = 0;
sync_timestamp(ctx, device);
}
typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
static void
begin_event(struct intel_ds_queue *queue, uint64_t ts_ns,
enum intel_ds_queue_stage stage_id)
{
/* If we haven't managed to calibrate the alignment between GPU and CPU
* timestamps yet, then skip this trace, otherwise perfetto won't know
* what to do with it.
*/
if (!queue->device->sync_gpu_ts) {
queue->stages[stage_id].start_ns = 0;
return;
}
queue->stages[stage_id].start_ns = ts_ns;
}
static void
end_event(struct intel_ds_queue *queue, uint64_t ts_ns,
enum intel_ds_queue_stage stage_id,
uint32_t submission_id, const void* payload = nullptr,
trace_payload_as_extra_func payload_as_extra = nullptr)
{
struct intel_ds_device *device = queue->device;
/* If we haven't managed to calibrate the alignment between GPU and CPU
* timestamps yet, then skip this trace, otherwise perfetto won't know
* what to do with it.
*/
if (!device->sync_gpu_ts)
return;
struct intel_ds_stage *stage = &queue->stages[stage_id];
uint64_t start_ns = stage->start_ns;
if (!start_ns)
return;
uint64_t evt_id = device->event_id++;
IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
send_descriptors(tctx, queue->device);
state->was_cleared = false;
}
sync_timestamp(tctx, queue->device);
auto packet = tctx.NewTracePacket();
packet->set_timestamp(start_ns);
packet->set_timestamp_clock_id(queue->device->gpu_clock_id);
assert(ts_ns >= start_ns);
auto event = packet->set_gpu_render_stage_event();
event->set_gpu_id(queue->device->gpu_id);
event->set_hw_queue_iid(stage->queue_iid);
event->set_stage_iid(stage->stage_iid);
event->set_context(queue->device->iid);
event->set_event_id(evt_id);
event->set_duration(ts_ns - start_ns);
event->set_submission_id(submission_id);
if (payload && payload_as_extra) {
payload_as_extra(event, payload);
}
});
stage->start_ns = 0;
}
static void
custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent *event,
const struct trace_intel_end_stall *payload)
{
char buf[256];
{
auto data = event->add_extra_data();
data->set_name("stall_reason");
snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s : %s",
(payload->flags & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "",
(payload->flags & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "",
(payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
(payload->flags & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "",
(payload->flags & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "",
(payload->flags & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "",
(payload->flags & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "",
(payload->flags & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "",
(payload->flags & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "",
(payload->flags & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "",
(payload->flags & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "",
(payload->flags & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "",
(payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
(payload->flags & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "",
(payload->flags & INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) ? "+udp_flush" : "",
payload->reason ? payload->reason : "unknown");
assert(strlen(buf) > 0);
data->set_value(buf);
}
}
#endif /* HAVE_PERFETTO */
#ifdef __cplusplus
extern "C" {
#endif
#ifdef HAVE_PERFETTO
/*
* Trace callbacks, called from u_trace once the timestamps from GPU have been
* collected.
*/
#define CREATE_DUAL_EVENT_CALLBACK(event_name, stage) \
void \
intel_ds_begin_##event_name(struct intel_ds_device *device, \
uint64_t ts_ns, \
const void *flush_data, \
const struct trace_intel_begin_##event_name *payload) \
{ \
const struct intel_ds_flush_data *flush = \
(const struct intel_ds_flush_data *) flush_data; \
begin_event(flush->queue, ts_ns, stage); \
} \
\
void \
intel_ds_end_##event_name(struct intel_ds_device *device, \
uint64_t ts_ns, \
const void *flush_data, \
const struct trace_intel_end_##event_name *payload) \
{ \
const struct intel_ds_flush_data *flush = \
(const struct intel_ds_flush_data *) flush_data; \
end_event(flush->queue, ts_ns, stage, flush->submission_id, \
payload, \
(trace_payload_as_extra_func) \
&trace_payload_as_extra_intel_end_##event_name); \
} \
CREATE_DUAL_EVENT_CALLBACK(batch, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(cmd_buffer, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS)
CREATE_DUAL_EVENT_CALLBACK(dyn_render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS)
CREATE_DUAL_EVENT_CALLBACK(blorp, INTEL_DS_QUEUE_STAGE_BLORP)
CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW)
CREATE_DUAL_EVENT_CALLBACK(draw_indexed, INTEL_DS_QUEUE_STAGE_DRAW)
CREATE_DUAL_EVENT_CALLBACK(draw_indexed_multi, INTEL_DS_QUEUE_STAGE_DRAW)
CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
CREATE_DUAL_EVENT_CALLBACK(draw_multi, INTEL_DS_QUEUE_STAGE_DRAW)
CREATE_DUAL_EVENT_CALLBACK(draw_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
CREATE_DUAL_EVENT_CALLBACK(draw_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
CREATE_DUAL_EVENT_CALLBACK(draw_indirect_byte_count, INTEL_DS_QUEUE_STAGE_DRAW)
CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE)
void
intel_ds_begin_stall(struct intel_ds_device *device,
uint64_t ts_ns,
const void *flush_data,
const struct trace_intel_begin_stall *payload)
{
const struct intel_ds_flush_data *flush =
(const struct intel_ds_flush_data *) flush_data;
begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL);
}
void
intel_ds_end_stall(struct intel_ds_device *device,
uint64_t ts_ns,
const void *flush_data,
const struct trace_intel_end_stall *payload)
{
const struct intel_ds_flush_data *flush =
(const struct intel_ds_flush_data *) flush_data;
end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL, flush->submission_id,
payload,
(trace_payload_as_extra_func)custom_trace_payload_as_extra_end_stall);
}
uint64_t
intel_ds_begin_submit(struct intel_ds_queue *queue)
{
return perfetto::base::GetBootTimeNs().count();
}
void
intel_ds_end_submit(struct intel_ds_queue *queue,
uint64_t start_ts)
{
if (!u_trace_context_actively_tracing(&queue->device->trace_context)) {
queue->device->sync_gpu_ts = 0;
queue->device->next_clock_sync_ns = 0;
return;
}
uint64_t end_ts = perfetto::base::GetBootTimeNs().count();
uint32_t submission_id = queue->submission_id++;
IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
send_descriptors(tctx, queue->device);
state->was_cleared = false;
}
sync_timestamp(tctx, queue->device);
auto packet = tctx.NewTracePacket();
packet->set_timestamp(start_ts);
auto event = packet->set_vulkan_api_event();
auto submit = event->set_vk_queue_submit();
// submit->set_pid(os_get_pid());
// submit->set_tid(os_get_tid());
submit->set_duration_ns(end_ts - start_ts);
submit->set_vk_queue((uintptr_t) queue);
submit->set_submission_id(submission_id);
});
}
#endif /* HAVE_PERFETTO */
static void
intel_driver_ds_init_once(void)
{
#ifdef HAVE_PERFETTO
util_perfetto_init();
perfetto::DataSourceDescriptor dsd;
dsd.set_name("gpu.renderstages.intel");
IntelRenderpassDataSource::Register(dsd);
#endif
}
static once_flag intel_driver_ds_once_flag = ONCE_FLAG_INIT;
static uint64_t get_iid()
{
static uint64_t iid = 1;
return iid++;
}
void
intel_driver_ds_init(void)
{
call_once(&intel_driver_ds_once_flag,
intel_driver_ds_init_once);
}
void
intel_ds_device_init(struct intel_ds_device *device,
const struct intel_device_info *devinfo,
int drm_fd,
uint32_t gpu_id,
enum intel_ds_api api)
{
memset(device, 0, sizeof(*device));
assert(gpu_id < 128);
device->gpu_id = gpu_id;
device->gpu_clock_id = intel_pps_clock_id(gpu_id);
device->fd = drm_fd;
device->info = *devinfo;
device->iid = get_iid();
device->api = api;
u_vector_init(&device->queues, 4, sizeof(struct intel_ds_queue));
}
void
intel_ds_device_fini(struct intel_ds_device *device)
{
u_trace_context_fini(&device->trace_context);
u_vector_finish(&device->queues);
}
struct intel_ds_queue *
intel_ds_device_add_queue(struct intel_ds_device *device,
const char *fmt_name,
...)
{
struct intel_ds_queue *queue =
(struct intel_ds_queue *) u_vector_add(&device->queues);
va_list ap;
memset(queue, 0, sizeof(*queue));
queue->device = device;
queue->queue_id = u_vector_length(&device->queues) - 1;
va_start(ap, fmt_name);
vsnprintf(queue->name, sizeof(queue->name), fmt_name, ap);
va_end(ap);
for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
queue->stages[s].queue_iid = get_iid();
queue->stages[s].stage_iid = get_iid();
}
return queue;
}
void intel_ds_flush_data_init(struct intel_ds_flush_data *data,
struct intel_ds_queue *queue,
uint64_t submission_id)
{
memset(data, 0, sizeof(*data));
data->queue = queue;
data->submission_id = submission_id;
u_trace_init(&data->trace, &queue->device->trace_context);
}
void intel_ds_flush_data_fini(struct intel_ds_flush_data *data)
{
u_trace_fini(&data->trace);
}
#ifdef __cplusplus
}
#endif