mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-24 16:50:22 +01:00
radeonsi: Add perfetto support in radeonsi
Add perfetto code in new files si_perfetto.h/cc which add tracepoint begin and end event and calls to the generated code from python si_tracepoints.py Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23664>
This commit is contained in:
parent
4752b188dc
commit
a164e147e9
5 changed files with 659 additions and 2 deletions
|
|
@ -18,6 +18,20 @@
|
|||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
si_tracepoints = custom_target(
|
||||
'si_tracepoints.[ch]',
|
||||
input: 'si_tracepoints.py',
|
||||
output: ['si_tracepoints.c', 'si_tracepoints_perfetto.h', 'si_tracepoints.h'],
|
||||
command: [
|
||||
prog_python, '@INPUT@',
|
||||
'-p', join_paths(dir_source_root, 'src/util/perf/'),
|
||||
'-C', '@OUTPUT0@',
|
||||
'--perfetto-hdr', '@OUTPUT1@',
|
||||
'-H', '@OUTPUT2@'
|
||||
],
|
||||
depend_files: u_trace_py,
|
||||
)
|
||||
|
||||
files_libradeonsi = files(
|
||||
'driinfo_radeonsi.h',
|
||||
'gfx10_shader_ngg.c',
|
||||
|
|
@ -101,9 +115,17 @@ files_libradeonsi = files(
|
|||
'radeon_video.h',
|
||||
)
|
||||
|
||||
files_libradeonsi += si_tracepoints
|
||||
|
||||
radeonsi_include_dirs = [inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_amd_common,
|
||||
inc_amd_common_llvm, inc_gallium_drivers]
|
||||
radeonsi_deps = [dep_llvm, dep_clock, dep_libdrm_radeon, idep_nir_headers, idep_amdgfxregs_h, idep_mesautil, idep_aco]
|
||||
inc_amd_common_llvm, inc_gallium_drivers, inc_compiler]
|
||||
radeonsi_deps = [dep_llvm, dep_clock, dep_libdrm_radeon, idep_nir_headers, idep_amdgfxregs_h, idep_mesautil, idep_aco, idep_u_tracepoints]
|
||||
|
||||
if with_perfetto
|
||||
radeonsi_deps += dep_perfetto
|
||||
endif
|
||||
|
||||
files_libradeonsi += ['si_perfetto.cpp', 'si_perfetto.h']
|
||||
|
||||
radeonsi_gfx_libs = []
|
||||
foreach ver : ['6', '7', '8', '9', '10', '103', '11']
|
||||
|
|
|
|||
394
src/gallium/drivers/radeonsi/si_perfetto.cpp
Normal file
394
src/gallium/drivers/radeonsi/si_perfetto.cpp
Normal file
|
|
@ -0,0 +1,394 @@
|
|||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_process.h"
|
||||
#include "util/hash_table.h"
|
||||
|
||||
#include "si_pipe.h"
|
||||
#include "si_perfetto.h"
|
||||
#include "si_tracepoints.h"
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
||||
#include "util/perf/u_perfetto.h"
|
||||
#include "util/perf/u_perfetto_renderpass.h"
|
||||
|
||||
#include "si_tracepoints_perfetto.h"
|
||||
|
||||
/* Just naming stages */
|
||||
static const struct {
|
||||
const char *name;
|
||||
|
||||
/* The perfetto UI requires that there is a parent-child relationship
|
||||
* within a row of elements. Which means that all children elements must
|
||||
* end within the lifespan of their parent.
|
||||
*
|
||||
* Some elements like stalls and command buffers follow that relationship,
|
||||
* but not all. This tells us in which UI row the elements should live.
|
||||
*/
|
||||
enum si_ds_queue_stage draw_stage;
|
||||
} si_queue_stage_desc[SI_DS_QUEUE_STAGE_N_STAGES] = {
|
||||
/* Order must match the enum! */
|
||||
{
|
||||
"queue",
|
||||
SI_DS_QUEUE_STAGE_QUEUE,
|
||||
},
|
||||
{
|
||||
"compute",
|
||||
SI_DS_QUEUE_STAGE_COMPUTE,
|
||||
},
|
||||
{
|
||||
"draw",
|
||||
SI_DS_QUEUE_STAGE_DRAW,
|
||||
}
|
||||
};
|
||||
|
||||
struct SIRenderpassIncrementalState {
|
||||
bool was_cleared = true;
|
||||
};
|
||||
|
||||
struct SIRenderpassTraits : public perfetto::DefaultDataSourceTraits {
|
||||
using IncrementalStateType = SIRenderpassIncrementalState;
|
||||
};
|
||||
|
||||
class SIRenderpassDataSource : public MesaRenderpassDataSource<SIRenderpassDataSource, SIRenderpassTraits> {
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(SIRenderpassDataSource);
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(SIRenderpassDataSource);
|
||||
|
||||
using perfetto::protos::pbzero::InternedGpuRenderStageSpecification_RenderStageCategory;
|
||||
|
||||
static void sync_timestamp(SIRenderpassDataSource::TraceContext &ctx, struct si_ds_device *device)
|
||||
{
|
||||
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
|
||||
uint64_t gpu_ts;
|
||||
|
||||
struct si_context *sctx = container_of(device, struct si_context, ds);
|
||||
gpu_ts = sctx->screen->b.get_timestamp(&sctx->screen->b);
|
||||
|
||||
|
||||
cpu_ts = perfetto::base::GetBootTimeNs().count();
|
||||
|
||||
if (cpu_ts < device->next_clock_sync_ns)
|
||||
return;
|
||||
|
||||
PERFETTO_LOG("sending clocks gpu=0x%08x", device->gpu_clock_id);
|
||||
|
||||
device->sync_gpu_ts = gpu_ts;
|
||||
device->next_clock_sync_ns = cpu_ts + 1000000000ull;
|
||||
MesaRenderpassDataSource<SIRenderpassDataSource, SIRenderpassTraits>::EmitClockSync(ctx, cpu_ts, gpu_ts, device->gpu_clock_id);
|
||||
}
|
||||
|
||||
static void send_descriptors(SIRenderpassDataSource::TraceContext &ctx, struct si_ds_device *device)
|
||||
{
|
||||
PERFETTO_LOG("Sending renderstage descriptors");
|
||||
|
||||
device->event_id = 0;
|
||||
list_for_each_entry_safe(struct si_ds_queue, queue, &device->queues, link) {
|
||||
for (uint32_t s = 0; s < ARRAY_SIZE(queue->stages); s++) {
|
||||
queue->stages[s].start_ns[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto packet = ctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
|
||||
packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
|
||||
packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
|
||||
|
||||
auto interned_data = packet->set_interned_data();
|
||||
|
||||
{
|
||||
auto desc = interned_data->add_graphics_contexts();
|
||||
desc->set_iid(device->iid);
|
||||
desc->set_pid(getpid());
|
||||
switch (device->api) {
|
||||
case AMD_DS_API_OPENGL:
|
||||
desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api::OPEN_GL);
|
||||
break;
|
||||
case AMD_DS_API_VULKAN:
|
||||
desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api::VULKAN);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit all the IID picked at device/queue creation. */
|
||||
list_for_each_entry_safe(struct si_ds_queue, queue, &device->queues, link) {
|
||||
for (unsigned s = 0; s < SI_DS_QUEUE_STAGE_N_STAGES; s++) {
|
||||
{
|
||||
/* We put the stage number in there so that all rows are order
|
||||
* by si_ds_queue_stage.
|
||||
*/
|
||||
char name[100];
|
||||
snprintf(name, sizeof(name), "%.10s-%s-%u-%s", util_get_process_name(), queue->name, s, si_queue_stage_desc[s].name);
|
||||
|
||||
auto desc = interned_data->add_gpu_specifications();
|
||||
desc->set_iid(queue->stages[s].queue_iid);
|
||||
desc->set_name(name);
|
||||
}
|
||||
{
|
||||
auto desc = interned_data->add_gpu_specifications();
|
||||
desc->set_iid(queue->stages[s].stage_iid);
|
||||
desc->set_name(si_queue_stage_desc[s].name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
device->next_clock_sync_ns = 0;
|
||||
sync_timestamp(ctx, device);
|
||||
}
|
||||
|
||||
typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
|
||||
|
||||
static void begin_event(struct si_ds_queue *queue, uint64_t ts_ns, enum si_ds_queue_stage stage_id)
|
||||
{
|
||||
PERFETTO_LOG("begin event called - ts_ns=%lu", ts_ns);
|
||||
uint32_t level = queue->stages[stage_id].level;
|
||||
/* If we haven't managed to calibrate the alignment between GPU and CPU
|
||||
* timestamps yet, then skip this trace, otherwise perfetto won't know
|
||||
* what to do with it.
|
||||
*/
|
||||
if (!queue->device->sync_gpu_ts) {
|
||||
queue->stages[stage_id].start_ns[level] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (level >= (ARRAY_SIZE(queue->stages[stage_id].start_ns) - 1))
|
||||
return;
|
||||
|
||||
queue->stages[stage_id].start_ns[level] = ts_ns;
|
||||
queue->stages[stage_id].level++;
|
||||
}
|
||||
|
||||
static void end_event(struct si_ds_queue *queue, uint64_t ts_ns, enum si_ds_queue_stage stage_id, uint32_t submission_id, const char *app_event, const void* payload = nullptr, trace_payload_as_extra_func payload_as_extra = nullptr)
|
||||
{
|
||||
PERFETTO_LOG("end event called - ts_ns=%lu", ts_ns);
|
||||
struct si_ds_device *device = queue->device;
|
||||
|
||||
/* If we haven't managed to calibrate the alignment between GPU and CPU
|
||||
* timestamps yet, then skip this trace, otherwise perfetto won't know
|
||||
* what to do with it.
|
||||
*/
|
||||
if (!device->sync_gpu_ts)
|
||||
return;
|
||||
|
||||
if (queue->stages[stage_id].level == 0)
|
||||
return;
|
||||
|
||||
uint32_t level = --queue->stages[stage_id].level;
|
||||
struct si_ds_stage *stage = &queue->stages[stage_id];
|
||||
uint64_t start_ns = stage->start_ns[level];
|
||||
PERFETTO_LOG("end event called - start_ns=%lu ts_ns=%lu", start_ns, ts_ns);
|
||||
if (!start_ns || start_ns > ts_ns)
|
||||
return;
|
||||
|
||||
SIRenderpassDataSource::Trace([=](SIRenderpassDataSource::TraceContext tctx) {
|
||||
if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
|
||||
send_descriptors(tctx, queue->device);
|
||||
state->was_cleared = false;
|
||||
}
|
||||
|
||||
sync_timestamp(tctx, queue->device);
|
||||
|
||||
uint64_t evt_id = device->event_id++;
|
||||
|
||||
/* If this is an application event, we might need to generate a new
|
||||
* stage_iid if not already seen. Otherwise, it's a driver event and we
|
||||
* have use the internal stage_iid.
|
||||
*/
|
||||
uint64_t stage_iid = app_event ? tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event) : stage->stage_iid;
|
||||
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(start_ns);
|
||||
packet->set_timestamp_clock_id(queue->device->gpu_clock_id);
|
||||
|
||||
assert(ts_ns >= start_ns);
|
||||
|
||||
auto event = packet->set_gpu_render_stage_event();
|
||||
event->set_gpu_id(queue->device->gpu_id);
|
||||
|
||||
event->set_hw_queue_iid(stage->queue_iid);
|
||||
event->set_stage_iid(stage_iid);
|
||||
event->set_context(queue->device->iid);
|
||||
event->set_event_id(evt_id);
|
||||
event->set_duration(ts_ns - start_ns);
|
||||
event->set_submission_id(submission_id);
|
||||
|
||||
if (payload && payload_as_extra) {
|
||||
payload_as_extra(event, payload);
|
||||
}
|
||||
});
|
||||
|
||||
stage->start_ns[level] = 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_PERFETTO */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
||||
/*
|
||||
* Trace callbacks, called from u_trace once the timestamps from GPU have been
|
||||
* collected.
|
||||
*/
|
||||
|
||||
#define CREATE_DUAL_EVENT_CALLBACK(event_name, stage) \
|
||||
void si_ds_begin_##event_name(struct si_ds_device *device, uint64_t ts_ns, uint16_t tp_idx, \
|
||||
const void *flush_data, \
|
||||
const struct trace_si_begin_##event_name *payload) \
|
||||
{ \
|
||||
const struct si_ds_flush_data *flush = (const struct si_ds_flush_data *) flush_data; \
|
||||
begin_event(flush->queue, ts_ns, stage); \
|
||||
} \
|
||||
\
|
||||
void si_ds_end_##event_name(struct si_ds_device *device, uint64_t ts_ns, uint16_t tp_idx, \
|
||||
const void *flush_data, \
|
||||
const struct trace_si_end_##event_name *payload) \
|
||||
{ \
|
||||
const struct si_ds_flush_data *flush = (const struct si_ds_flush_data *) flush_data; \
|
||||
end_event(flush->queue, ts_ns, stage, flush->submission_id, NULL, payload, \
|
||||
(trace_payload_as_extra_func)&trace_payload_as_extra_si_end_##event_name); \
|
||||
} \
|
||||
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw, SI_DS_QUEUE_STAGE_DRAW)
|
||||
CREATE_DUAL_EVENT_CALLBACK(compute, SI_DS_QUEUE_STAGE_COMPUTE)
|
||||
|
||||
uint64_t si_ds_begin_submit(struct si_ds_queue *queue)
|
||||
{
|
||||
return perfetto::base::GetBootTimeNs().count();
|
||||
}
|
||||
|
||||
void si_ds_end_submit(struct si_ds_queue *queue, uint64_t start_ts)
|
||||
{
|
||||
if (!u_trace_should_process(&queue->device->trace_context)) {
|
||||
queue->device->sync_gpu_ts = 0;
|
||||
queue->device->next_clock_sync_ns = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t end_ts = perfetto::base::GetBootTimeNs().count();
|
||||
uint32_t submission_id = queue->submission_id++;
|
||||
|
||||
SIRenderpassDataSource::Trace([=](SIRenderpassDataSource::TraceContext tctx) {
|
||||
if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
|
||||
send_descriptors(tctx, queue->device);
|
||||
state->was_cleared = false;
|
||||
}
|
||||
|
||||
sync_timestamp(tctx, queue->device);
|
||||
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(start_ts);
|
||||
|
||||
auto event = packet->set_vulkan_api_event();
|
||||
auto submit = event->set_vk_queue_submit();
|
||||
|
||||
submit->set_duration_ns(end_ts - start_ts);
|
||||
submit->set_vk_queue((uintptr_t) queue);
|
||||
submit->set_submission_id(submission_id);
|
||||
});
|
||||
}
|
||||
|
||||
#endif /* HAVE_PERFETTO */
|
||||
|
||||
static void si_driver_ds_init_once(void)
|
||||
{
|
||||
#ifdef HAVE_PERFETTO
|
||||
util_perfetto_init();
|
||||
perfetto::DataSourceDescriptor dsd;
|
||||
dsd.set_name("gpu.renderstages.amd");
|
||||
SIRenderpassDataSource::Register(dsd);
|
||||
#endif
|
||||
}
|
||||
|
||||
static once_flag si_driver_ds_once_flag = ONCE_FLAG_INIT;
|
||||
static uint64_t iid = 1;
|
||||
|
||||
static uint64_t get_iid()
|
||||
{
|
||||
return iid++;
|
||||
}
|
||||
|
||||
static uint32_t si_pps_clock_id(uint32_t gpu_id)
|
||||
{
|
||||
char buf[40];
|
||||
snprintf(buf, sizeof(buf), "org.freedesktop.mesa.amd.gpu%u", gpu_id);
|
||||
|
||||
return _mesa_hash_string(buf) | 0x80000000;
|
||||
}
|
||||
|
||||
void si_driver_ds_init(void)
|
||||
{
|
||||
call_once(&si_driver_ds_once_flag, si_driver_ds_init_once);
|
||||
si_gpu_tracepoint_config_variable();
|
||||
}
|
||||
|
||||
void si_ds_device_init(struct si_ds_device *device, const struct radeon_info *devinfo, uint32_t gpu_id, enum amd_ds_api api)
|
||||
{
|
||||
device->gpu_id = gpu_id;
|
||||
device->gpu_clock_id = si_pps_clock_id(gpu_id);
|
||||
device->info = devinfo;
|
||||
device->iid = get_iid();
|
||||
device->api = api;
|
||||
list_inithead(&device->queues);
|
||||
}
|
||||
|
||||
void si_ds_device_fini(struct si_ds_device *device)
|
||||
{
|
||||
u_trace_context_fini(&device->trace_context);
|
||||
}
|
||||
|
||||
struct si_ds_queue * si_ds_device_init_queue(struct si_ds_device *device, struct si_ds_queue *queue, const char *fmt_name, ...)
|
||||
{
|
||||
va_list ap;
|
||||
queue->device = device;
|
||||
|
||||
va_start(ap, fmt_name);
|
||||
vsnprintf(queue->name, sizeof(queue->name), fmt_name, ap);
|
||||
va_end(ap);
|
||||
|
||||
for (unsigned s = 0; s < SI_DS_QUEUE_STAGE_N_STAGES; s++) {
|
||||
queue->stages[s].queue_iid = get_iid();
|
||||
queue->stages[s].stage_iid = get_iid();
|
||||
}
|
||||
|
||||
list_add(&queue->link, &device->queues);
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
void si_ds_flush_data_init(struct si_ds_flush_data *data, struct si_ds_queue *queue, uint64_t submission_id)
|
||||
{
|
||||
memset(data, 0, sizeof(*data));
|
||||
|
||||
data->queue = queue;
|
||||
data->submission_id = submission_id;
|
||||
|
||||
u_trace_init(&data->trace, &queue->device->trace_context);
|
||||
}
|
||||
|
||||
void si_ds_flush_data_fini(struct si_ds_flush_data *data)
|
||||
{
|
||||
u_trace_fini(&data->trace);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
159
src/gallium/drivers/radeonsi/si_perfetto.h
Normal file
159
src/gallium/drivers/radeonsi/si_perfetto.h
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef SI_PERFETTO_H
|
||||
#define SI_PERFETTO_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "util/perf/u_trace.h"
|
||||
#include "util/u_vector.h"
|
||||
|
||||
#include "amd/common/ac_gpu_info.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Perfetto collects TracePackets from the application and/or drivers. It is the root object of a
|
||||
* Perfetto trace. A Perfetto trace is a linear sequence of TracePackets.
|
||||
* TracePackets contains timestamp and timestamp_clock_id along with lots of other data
|
||||
* like gpu_counter_event and gpu_render_stage_event.
|
||||
* gpu_render_stage_event contains data such as event_id, duration, gpu_id, stage_iid, context etc.
|
||||
* So a render stage can be named as "draw" which will collect start timestamp and end timestamp
|
||||
* along with other payload data of each draw call from OpenGL
|
||||
*/
|
||||
|
||||
enum amd_ds_api {
|
||||
AMD_DS_API_OPENGL,
|
||||
AMD_DS_API_VULKAN,
|
||||
};
|
||||
|
||||
enum si_ds_queue_stage {
|
||||
SI_DS_QUEUE_STAGE_QUEUE,
|
||||
SI_DS_QUEUE_STAGE_COMPUTE,
|
||||
SI_DS_QUEUE_STAGE_DRAW,
|
||||
SI_DS_QUEUE_STAGE_N_STAGES,
|
||||
};
|
||||
|
||||
struct si_ds_device {
|
||||
const struct radeon_info *info;
|
||||
|
||||
/* API of this device */
|
||||
enum amd_ds_api api;
|
||||
|
||||
/* GPU identifier domain:bus:device:func:pci_id */
|
||||
uint32_t gpu_id;
|
||||
|
||||
/* Clock identifier for this device. */
|
||||
uint32_t gpu_clock_id;
|
||||
|
||||
/* The timestamp at the point where we first emitted the clock_sync..
|
||||
* this will be a *later* timestamp that the first GPU traces (since
|
||||
* we capture the first clock_sync from the CPU *after* the first GPU
|
||||
* tracepoints happen). To avoid confusing perfetto we need to drop
|
||||
* the GPU traces with timestamps before this.
|
||||
*/
|
||||
uint64_t sync_gpu_ts;
|
||||
|
||||
/* Next timestamp after which we should resend a clock correlation. */
|
||||
uint64_t next_clock_sync_ns;
|
||||
|
||||
/* Unique perfetto identifier for the context */
|
||||
uint64_t iid;
|
||||
|
||||
/* Event ID generator (manipulate only inside
|
||||
* SIRenderpassDataSource::Trace)
|
||||
*/
|
||||
uint64_t event_id;
|
||||
|
||||
struct u_trace_context trace_context;
|
||||
|
||||
/* List of si_ds_queue */
|
||||
struct list_head queues;
|
||||
};
|
||||
|
||||
struct si_ds_stage {
|
||||
/* Unique hw_queue IID */
|
||||
uint64_t queue_iid;
|
||||
|
||||
/* Unique stage IID */
|
||||
uint64_t stage_iid;
|
||||
|
||||
/* Start timestamp of the last work element. We have a array indexed by
|
||||
* level so that we can track multi levels of events (like
|
||||
* primary/secondary command buffers).
|
||||
*/
|
||||
uint64_t start_ns[5];
|
||||
|
||||
/* Current number of valid elements in start_ns */
|
||||
uint32_t level;
|
||||
};
|
||||
|
||||
struct si_ds_queue {
|
||||
struct list_head link;
|
||||
|
||||
/* Device this queue belongs to */
|
||||
struct si_ds_device *device;
|
||||
|
||||
/* Unique name of the queue */
|
||||
char name[80];
|
||||
|
||||
/* Counter incremented on each si_ds_end_submit() call */
|
||||
uint64_t submission_id;
|
||||
|
||||
struct si_ds_stage stages[SI_DS_QUEUE_STAGE_N_STAGES];
|
||||
};
|
||||
|
||||
struct si_ds_flush_data {
|
||||
struct si_ds_queue *queue;
|
||||
|
||||
/* u_trace element in which we copy other traces in case we deal with
|
||||
* reusable command buffers.
|
||||
*/
|
||||
struct u_trace trace;
|
||||
|
||||
/* Unique submission ID associated with the trace */
|
||||
uint64_t submission_id;
|
||||
};
|
||||
|
||||
void si_driver_ds_init(void);
|
||||
|
||||
void si_ds_device_init(struct si_ds_device *device, const struct radeon_info *devinfo,
|
||||
uint32_t gpu_id, enum amd_ds_api api);
|
||||
void si_ds_device_fini(struct si_ds_device *device);
|
||||
|
||||
struct si_ds_queue *si_ds_device_init_queue(struct si_ds_device *device, struct si_ds_queue *queue,
|
||||
const char *fmt_name, ...);
|
||||
|
||||
void si_ds_flush_data_init(struct si_ds_flush_data *data, struct si_ds_queue *queue,
|
||||
uint64_t submission_id);
|
||||
|
||||
void si_ds_flush_data_fini(struct si_ds_flush_data *data);
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
uint64_t si_ds_begin_submit(struct si_ds_queue *queue);
|
||||
void si_ds_end_submit(struct si_ds_queue *queue,
|
||||
uint64_t start_ts);
|
||||
|
||||
#else
|
||||
static inline uint64_t si_ds_begin_submit(struct si_ds_queue *queue)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void si_ds_end_submit(struct si_ds_queue *queue, uint64_t start_ts)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* HAVE_PERFETTO */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* SI_PERFETTO_H */
|
||||
|
|
@ -17,6 +17,7 @@
|
|||
#include "util/u_vertex_state_cache.h"
|
||||
#include "ac_sqtt.h"
|
||||
#include "ac_spm.h"
|
||||
#include "si_perfetto.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -1361,6 +1362,8 @@ struct si_context {
|
|||
/* TODO: move other shaders here too */
|
||||
/* Only used for DCC MSAA clears with 4-8 fragments and 4-16 samples. */
|
||||
void *cs_clear_dcc_msaa[32][5][2][3][2]; /* [swizzle_mode][log2(bpe)][fragments == 8][log2(samples)-2][is_array] */
|
||||
|
||||
struct si_ds_device ds;
|
||||
};
|
||||
|
||||
/* si_blit.c */
|
||||
|
|
|
|||
79
src/gallium/drivers/radeonsi/si_tracepoints.py
Normal file
79
src/gallium/drivers/radeonsi/si_tracepoints.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
#
|
||||
# Copyright 2023 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
# List of the default tracepoints enabled. By default most tracepoints are
|
||||
# enabled, set tp_default=False to disable them by default.
|
||||
#
|
||||
si_default_tps = []
|
||||
|
||||
#
|
||||
# Tracepoint definitions:
|
||||
#
|
||||
def define_tracepoints(args):
|
||||
from u_trace import Header, HeaderScope
|
||||
from u_trace import ForwardDecl
|
||||
from u_trace import Tracepoint
|
||||
from u_trace import TracepointArg as Arg
|
||||
from u_trace import TracepointArgStruct as ArgStruct
|
||||
|
||||
Header('si_perfetto.h', scope=HeaderScope.HEADER)
|
||||
|
||||
|
||||
def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None,
|
||||
tp_default_enabled=True, end_pipelined=True,
|
||||
need_cs_param=False):
|
||||
global si_default_tps
|
||||
if tp_default_enabled:
|
||||
si_default_tps.append(name)
|
||||
Tracepoint('si_begin_{0}'.format(name),
|
||||
toggle_name=name,
|
||||
tp_perfetto='si_ds_begin_{0}'.format(name),
|
||||
need_cs_param=need_cs_param)
|
||||
Tracepoint('si_end_{0}'.format(name),
|
||||
toggle_name=name,
|
||||
args=tp_args,
|
||||
tp_struct=tp_struct,
|
||||
tp_perfetto='si_ds_end_{0}'.format(name),
|
||||
tp_print=tp_print,
|
||||
end_of_pipe=end_pipelined,
|
||||
need_cs_param=need_cs_param)
|
||||
|
||||
# Various draws/dispatch, radeonsi
|
||||
begin_end_tp('draw',
|
||||
tp_args=[Arg(type='uint32_t', var='count', c_format='%u')])
|
||||
|
||||
begin_end_tp('compute',
|
||||
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),
|
||||
Arg(type='uint32_t', var='group_y', c_format='%u'),
|
||||
Arg(type='uint32_t', var='group_z', c_format='%u'),],
|
||||
tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'])
|
||||
|
||||
def generate_code(args):
|
||||
from u_trace import utrace_generate
|
||||
from u_trace import utrace_generate_perfetto_utils
|
||||
|
||||
utrace_generate(cpath=args.src, hpath=args.hdr,
|
||||
ctx_param='struct si_ds_device *dev',
|
||||
trace_toggle_name='si_gpu_tracepoint',
|
||||
trace_toggle_defaults=si_default_tps)
|
||||
utrace_generate_perfetto_utils(hpath=args.perfetto_hdr)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-p', '--import-path', required=True)
|
||||
parser.add_argument('-C','--src', required=True)
|
||||
parser.add_argument('-H','--hdr', required=True)
|
||||
parser.add_argument('--perfetto-hdr', required=True)
|
||||
args = parser.parse_args()
|
||||
sys.path.insert(0, args.import_path)
|
||||
define_tracepoints(args)
|
||||
generate_code(args)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Reference in a new issue