anv: instrument resource barriers instruction in u_trace

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38707>
This commit is contained in:
Lionel Landwerlin 2024-10-24 10:48:27 -07:00 committed by Marge Bot
parent 3520abf8a3
commit 15174b185b
4 changed files with 263 additions and 28 deletions

View file

@ -307,42 +307,112 @@ end_event(struct intel_ds_queue *queue, uint64_t ts_ns,
stage->start_ns[level] = 0;
}
static size_t
snprintf_stages(char *buf, size_t buf_size,
enum intel_ds_barrier_type type,
enum intel_ds_stages signal_stages,
enum intel_ds_stages wait_stages)
{
return
snprintf(buf, buf_size, "%s: %s%s%s%s%s%s%s->%s%s%s%s%s%s%s: ",
type == INTEL_DS_BARRIER_TYPE_IMMEDIATE ? "imm" :
type == INTEL_DS_BARRIER_TYPE_SIGNAL ? "signal" :
type == INTEL_DS_BARRIER_TYPE_WAIT ? "wait" : "unknown",
(signal_stages & INTEL_DS_STAGES_TOP_BIT) ? "+top" : "",
(signal_stages & INTEL_DS_STAGES_GEOM_BIT) ? "+geom" : "",
(signal_stages & INTEL_DS_STAGES_RASTER_BIT) ? "+rast" : "",
(signal_stages & INTEL_DS_STAGES_DEPTH_BIT) ? "+ds" : "",
(signal_stages & INTEL_DS_STAGES_PIXEL_BIT) ? "+pix" : "",
(signal_stages & INTEL_DS_STAGES_COLOR_BIT) ? "+col" : "",
(signal_stages & INTEL_DS_STAGES_GPGPU_BIT) ? "+cs" : "",
(wait_stages & INTEL_DS_STAGES_TOP_BIT) ? "+top" : "",
(wait_stages & INTEL_DS_STAGES_GEOM_BIT) ? "+geom" : "",
(wait_stages & INTEL_DS_STAGES_RASTER_BIT) ? "+rast" : "",
(wait_stages & INTEL_DS_STAGES_DEPTH_BIT) ? "+ds" : "",
(wait_stages & INTEL_DS_STAGES_PIXEL_BIT) ? "+pix" : "",
(wait_stages & INTEL_DS_STAGES_COLOR_BIT) ? "+col" : "",
(wait_stages & INTEL_DS_STAGES_GPGPU_BIT) ? "+cs" : "");
}
static size_t
snprintf_flags(char *buf, size_t buf_size, enum intel_ds_stall_flag bits)
{
return
snprintf(buf, buf_size, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
(bits & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "",
(bits & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "",
(bits & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
(bits & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "",
(bits & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "",
(bits & INTEL_DS_L3_FABRIC_FLUSH_BIT) ? "+l3_fabric_flush" : "",
(bits & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "",
(bits & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "",
(bits & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "",
(bits & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "",
(bits & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "",
(bits & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "",
(bits & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "",
(bits & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "",
(bits & INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) ? "+udp_flush" : "",
(bits & INTEL_DS_END_OF_PIPE_BIT) ? "+eop" : "",
(bits & INTEL_DS_CCS_CACHE_FLUSH_BIT) ? "+ccs_flush" : "");
}
static size_t
snprintf_reasons(char *buf, size_t buf_size,
const char *r1, const char *r2,
const char *r3, const char *r4)
{
return
snprintf(buf, buf_size, ": %s%s%s%s%s%s%s",
r1 ? r1 : "unknown",
r2 ? "; " : "", r2 ? r2 : "",
r3 ? "; " : "", r3 ? r3 : "",
r4 ? "; " : "", r4 ? r4 : "");
}
static void
custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent *event,
const struct trace_intel_end_stall *payload)
{
char buf[256];
size_t buf_size = 0;
{
auto data = event->add_extra_data();
data->set_name("stall_reason");
data->set_name("reason");
snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s : %s%s%s%s%s%s%s",
(payload->flags & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "",
(payload->flags & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "",
(payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
(payload->flags & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "",
(payload->flags & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "",
(payload->flags & INTEL_DS_L3_FABRIC_FLUSH_BIT) ? "+l3_fabric_flush" : "",
(payload->flags & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "",
(payload->flags & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "",
(payload->flags & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "",
(payload->flags & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "",
(payload->flags & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "",
(payload->flags & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "",
(payload->flags & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "",
(payload->flags & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "",
(payload->flags & INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) ? "+udp_flush" : "",
(payload->flags & INTEL_DS_END_OF_PIPE_BIT) ? "+eop" : "",
(payload->flags & INTEL_DS_CCS_CACHE_FLUSH_BIT) ? "+ccs_flush" : "",
(payload->reason1) ? payload->reason1 : "unknown",
(payload->reason2) ? "; " : "",
(payload->reason2) ? payload->reason2 : "",
(payload->reason3) ? "; " : "",
(payload->reason3) ? payload->reason3 : "",
(payload->reason4) ? "; " : "",
(payload->reason4) ? payload->reason4 : "");
buf_size += snprintf_flags(buf + buf_size, sizeof(buf) - buf_size,
(enum intel_ds_stall_flag) payload->flags);
buf_size += snprintf_reasons(buf + buf_size, sizeof(buf) - buf_size,
payload->reason1, payload->reason2,
payload->reason3, payload->reason4);
assert(strlen(buf) > 0);
data->set_value(buf);
}
}
static void
custom_trace_payload_as_extra_end_barrier(perfetto::protos::pbzero::GpuRenderStageEvent *event,
const struct trace_intel_end_barrier *payload)
{
char buf[256];
size_t buf_size = 0;
{
auto data = event->add_extra_data();
data->set_name("reason");
buf_size += snprintf_stages(buf + buf_size, sizeof(buf) - buf_size,
(enum intel_ds_barrier_type) payload->type,
(enum intel_ds_stages) payload->signal_stages,
(enum intel_ds_stages) payload->wait_stages);
buf_size += snprintf_flags(buf + buf_size, sizeof(buf) - buf_size,
(enum intel_ds_stall_flag) payload->flags);
buf_size += snprintf_reasons(buf + buf_size, sizeof(buf) - buf_size,
payload->reason1, payload->reason2,
payload->reason3, payload->reason4);
assert(strlen(buf) > 0);
data->set_value(buf);
@ -514,6 +584,34 @@ intel_ds_end_stall(struct intel_ds_device *device,
(trace_payload_as_extra_func)custom_trace_payload_as_extra_end_stall);
}
void
intel_ds_begin_barrier(struct intel_ds_device *device,
uint64_t ts_ns,
uint16_t tp_idx,
const void *flush_data,
const struct trace_intel_begin_barrier *payload,
const void *indirect_data)
{
const struct intel_ds_flush_data *flush =
(const struct intel_ds_flush_data *) flush_data;
begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL);
}
void
intel_ds_end_barrier(struct intel_ds_device *device,
uint64_t ts_ns,
uint16_t tp_idx,
const void *flush_data,
const struct trace_intel_end_barrier *payload,
const void *indirect_data)
{
const struct intel_ds_flush_data *flush =
(const struct intel_ds_flush_data *) flush_data;
end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL,
flush->submission_id, tp_idx, NULL, payload, indirect_data,
(trace_payload_as_extra_func)custom_trace_payload_as_extra_end_barrier);
}
uint64_t
intel_ds_begin_submit(struct intel_ds_queue *queue)
{

View file

@ -65,6 +65,22 @@ enum intel_ds_stall_flag {
INTEL_DS_L3_FABRIC_FLUSH_BIT = BITFIELD_BIT(17),
};
enum intel_ds_barrier_type {
INTEL_DS_BARRIER_TYPE_IMMEDIATE,
INTEL_DS_BARRIER_TYPE_SIGNAL,
INTEL_DS_BARRIER_TYPE_WAIT,
};
enum intel_ds_stages {
INTEL_DS_STAGES_TOP_BIT = BITFIELD_BIT(0),
INTEL_DS_STAGES_GEOM_BIT = BITFIELD_BIT(1),
INTEL_DS_STAGES_RASTER_BIT = BITFIELD_BIT(2),
INTEL_DS_STAGES_DEPTH_BIT = BITFIELD_BIT(3),
INTEL_DS_STAGES_PIXEL_BIT = BITFIELD_BIT(4),
INTEL_DS_STAGES_COLOR_BIT = BITFIELD_BIT(5),
INTEL_DS_STAGES_GPGPU_BIT = BITFIELD_BIT(6),
};
enum intel_ds_tracepoint_flags {
/**
* Whether the tracepoint's timestamp must be recorded with as an
@ -85,6 +101,9 @@ enum intel_ds_tracepoint_flags {
/* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */
typedef enum intel_ds_stall_flag (*intel_ds_stall_cb_t)(uint32_t flags);
/* Convert internal driver RESOUCE_BARRIER stages bits to intel_ds_stage. */
typedef enum intel_ds_stages (*intel_ds_stages_cb_t)(uint8_t stages);
enum intel_ds_queue_stage {
INTEL_DS_QUEUE_STAGE_QUEUE,
INTEL_DS_QUEUE_STAGE_FRAME,

View file

@ -272,10 +272,46 @@ def define_tracepoints(args):
bits.append(Arg(type='bool', name=a[1], var='__entry->flags & INTEL_DS_{0}_BIT'.format(a[0]), c_format='%u'))
return bits
def stall_args(args):
def stall_args(stall_bits):
fmt = ''
exprs = []
for a in args:
for a in stall_bits:
fmt += '%s'
exprs.append('(__entry->flags & INTEL_DS_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
fmt += ' : %s%s%s%s%s%s%s'
exprs.append('(__entry->reason1) ? __entry->reason1 : "unknown"')
exprs.append('(__entry->reason2) ? "; " : ""')
exprs.append('(__entry->reason2) ? __entry->reason2 : ""')
exprs.append('(__entry->reason3) ? "; " : ""')
exprs.append('(__entry->reason3) ? __entry->reason3 : ""')
exprs.append('(__entry->reason4) ? "; " : ""')
exprs.append('(__entry->reason4) ? __entry->reason4 : ""')
# To printout flags
# fmt += '(0x%08x)'
# exprs.append('__entry->flags')
fmt = [fmt]
fmt += exprs
return fmt
def barrier_args(stage_bits, stall_bits):
fmt = ''
exprs = []
fmt += '%s '
exprs.append('(__entry->type == INTEL_DS_BARRIER_TYPE_IMMEDIATE) ? "IMMEDIATE" : ' +
'(__entry->type == INTEL_DS_BARRIER_TYPE_SIGNAL) ? "SIGNAL" : ' +
'(__entry->type == INTEL_DS_BARRIER_TYPE_WAIT) ? "WAIT" : "unknown"')
for a in stages_bits:
fmt += '%s'
exprs.append('(__entry->signal_stages & INTEL_DS_STAGES_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
fmt += '->'
for a in stages_bits:
fmt += '%s'
exprs.append('(__entry->wait_stages & INTEL_DS_STAGES_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
fmt += ': '
for a in stall_bits:
fmt += '%s'
exprs.append('(__entry->flags & INTEL_DS_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
fmt += ' : %s%s%s%s%s%s%s'
@ -311,6 +347,37 @@ def define_tracepoints(args):
['END_OF_PIPE', 'eop'],
['CCS_CACHE_FLUSH', 'ccs_flush']]
stages_bits = [['TOP', 'top'],
['GEOM', 'geom'],
['RASTER', 'raster'],
['DEPTH', 'depth'],
['PIXEL', 'pixel'],
['COLOR', 'color'],
['GPGPU', 'gpgpu']]
begin_end_tp('barrier',
tp_args=[ArgStruct(type='uint8_t', var='type'),
ArgStruct(type='uint32_t', var='signal_stages'),
ArgStruct(type='uint32_t', var='wait_stages'),
ArgStruct(type='intel_ds_stages_cb_t', var='decode_stage_cb'),
ArgStruct(type='uint32_t', var='flags'),
ArgStruct(type='intel_ds_stall_cb_t', var='decode_flags_cb'),
ArgStruct(type='const char *', var='reason1'),
ArgStruct(type='const char *', var='reason2'),
ArgStruct(type='const char *', var='reason3'),
ArgStruct(type='const char *', var='reason4'),],
tp_struct=[Arg(type='uint8_t', name='type', var='type', c_format='0x%hhx'),
Arg(type='uint8_t', name='signal_stages', var='decode_stage_cb(signal_stages)', c_format='0x%hhx'),
Arg(type='uint8_t', name='wait_stages', var='decode_stage_cb(wait_stages)', c_format='0x%hhx'),
Arg(type='uint32_t', name='flags', var='decode_flags_cb(flags)', c_format='0x%x'),
Arg(type='const char *', name='reason1', var='reason1', c_format='%s'),
Arg(type='const char *', name='reason2', var='reason2', c_format='%s'),
Arg(type='const char *', name='reason3', var='reason3', c_format='%s'),
Arg(type='const char *', name='reason4', var='reason4', c_format='%s'),],
tp_print=barrier_args(stages_bits, stall_flags),
tp_default_enabled=False,
end_pipelined=False)
begin_end_tp('stall',
tp_args=[ArgStruct(type='uint32_t', var='flags'),
ArgStruct(type='intel_ds_stall_cb_t', var='decode_cb'),

View file

@ -1734,6 +1734,41 @@ anv_dump_rsc_barrier_body(const struct GENX(RESOURCE_BARRIER_BODY) body) {
mesa_log_stream_destroy(stream);
}
static inline enum intel_ds_stages
resource_barrier_stage_to_ds(uint8_t stages)
{
enum intel_ds_stages ret = 0;
u_foreach_bit(b, stages) {
switch (BITFIELD_BIT(b)) {
case RESOURCE_BARRIER_STAGE_TOP:
ret |= INTEL_DS_STAGES_TOP_BIT;
break;
case RESOURCE_BARRIER_STAGE_GEOM:
ret |= INTEL_DS_STAGES_GEOM_BIT;
break;
case RESOURCE_BARRIER_STAGE_RASTER:
ret |= INTEL_DS_STAGES_RASTER_BIT;
break;
case RESOURCE_BARRIER_STAGE_DEPTH:
ret |= INTEL_DS_STAGES_DEPTH_BIT;
break;
case RESOURCE_BARRIER_STAGE_PIXEL:
ret |= INTEL_DS_STAGES_PIXEL_BIT;
break;
case RESOURCE_BARRIER_STAGE_COLOR:
ret |= INTEL_DS_STAGES_COLOR_BIT;
break;
case RESOURCE_BARRIER_STAGE_GPGPU:
ret |= INTEL_DS_STAGES_GPGPU_BIT;
break;
default:
UNREACHABLE("invalid barrier stage");
}
}
return ret;
}
ALWAYS_INLINE static enum GENX(RESOURCE_BARRIER_STAGE)
resource_barrier_signal_stage(enum intel_engine_class engine_class,
const VkPipelineStageFlags2 vk_stages)
@ -1928,6 +1963,8 @@ emit_resource_barrier(struct anv_batch *batch,
struct anv_address signal_addr,
struct anv_address wait_addr)
{
trace_intel_begin_barrier(batch->trace);
enum GENX(RESOURCE_BARRIER_STAGE) signal_stages =
resource_barrier_signal_stage(batch->engine_class, src_stages);
enum GENX(RESOURCE_BARRIER_STAGE) wait_stages =
@ -2034,6 +2071,20 @@ emit_resource_barrier(struct anv_batch *batch,
anv_dump_rsc_barrier_body(barrier.ResourceBarrierBody);
}
trace_intel_end_barrier(batch->trace,
barrier_type, signal_stages, wait_stages,
resource_barrier_stage_to_ds,
bits, anv_pipe_flush_bit_to_ds_stall_flag,
batch->pc_reasons[0],
batch->pc_reasons[1],
batch->pc_reasons[2],
batch->pc_reasons[3]);
batch->pc_reasons[0] = NULL;
batch->pc_reasons[1] = NULL;
batch->pc_reasons[2] = NULL;
batch->pc_reasons[3] = NULL;
batch->pc_reasons_count = 0;
}
#endif /* GFX_VER >= 20 */