mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
anv: instrument resource barriers instruction in u_trace
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38707>
This commit is contained in:
parent
3520abf8a3
commit
15174b185b
4 changed files with 263 additions and 28 deletions
|
|
@ -307,42 +307,112 @@ end_event(struct intel_ds_queue *queue, uint64_t ts_ns,
|
|||
stage->start_ns[level] = 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
snprintf_stages(char *buf, size_t buf_size,
|
||||
enum intel_ds_barrier_type type,
|
||||
enum intel_ds_stages signal_stages,
|
||||
enum intel_ds_stages wait_stages)
|
||||
{
|
||||
return
|
||||
snprintf(buf, buf_size, "%s: %s%s%s%s%s%s%s->%s%s%s%s%s%s%s: ",
|
||||
type == INTEL_DS_BARRIER_TYPE_IMMEDIATE ? "imm" :
|
||||
type == INTEL_DS_BARRIER_TYPE_SIGNAL ? "signal" :
|
||||
type == INTEL_DS_BARRIER_TYPE_WAIT ? "wait" : "unknown",
|
||||
(signal_stages & INTEL_DS_STAGES_TOP_BIT) ? "+top" : "",
|
||||
(signal_stages & INTEL_DS_STAGES_GEOM_BIT) ? "+geom" : "",
|
||||
(signal_stages & INTEL_DS_STAGES_RASTER_BIT) ? "+rast" : "",
|
||||
(signal_stages & INTEL_DS_STAGES_DEPTH_BIT) ? "+ds" : "",
|
||||
(signal_stages & INTEL_DS_STAGES_PIXEL_BIT) ? "+pix" : "",
|
||||
(signal_stages & INTEL_DS_STAGES_COLOR_BIT) ? "+col" : "",
|
||||
(signal_stages & INTEL_DS_STAGES_GPGPU_BIT) ? "+cs" : "",
|
||||
(wait_stages & INTEL_DS_STAGES_TOP_BIT) ? "+top" : "",
|
||||
(wait_stages & INTEL_DS_STAGES_GEOM_BIT) ? "+geom" : "",
|
||||
(wait_stages & INTEL_DS_STAGES_RASTER_BIT) ? "+rast" : "",
|
||||
(wait_stages & INTEL_DS_STAGES_DEPTH_BIT) ? "+ds" : "",
|
||||
(wait_stages & INTEL_DS_STAGES_PIXEL_BIT) ? "+pix" : "",
|
||||
(wait_stages & INTEL_DS_STAGES_COLOR_BIT) ? "+col" : "",
|
||||
(wait_stages & INTEL_DS_STAGES_GPGPU_BIT) ? "+cs" : "");
|
||||
}
|
||||
|
||||
static size_t
|
||||
snprintf_flags(char *buf, size_t buf_size, enum intel_ds_stall_flag bits)
|
||||
{
|
||||
return
|
||||
snprintf(buf, buf_size, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
(bits & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "",
|
||||
(bits & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "",
|
||||
(bits & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
|
||||
(bits & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "",
|
||||
(bits & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "",
|
||||
(bits & INTEL_DS_L3_FABRIC_FLUSH_BIT) ? "+l3_fabric_flush" : "",
|
||||
(bits & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "",
|
||||
(bits & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "",
|
||||
(bits & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "",
|
||||
(bits & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "",
|
||||
(bits & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "",
|
||||
(bits & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "",
|
||||
(bits & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "",
|
||||
(bits & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "",
|
||||
(bits & INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) ? "+udp_flush" : "",
|
||||
(bits & INTEL_DS_END_OF_PIPE_BIT) ? "+eop" : "",
|
||||
(bits & INTEL_DS_CCS_CACHE_FLUSH_BIT) ? "+ccs_flush" : "");
|
||||
}
|
||||
|
||||
static size_t
|
||||
snprintf_reasons(char *buf, size_t buf_size,
|
||||
const char *r1, const char *r2,
|
||||
const char *r3, const char *r4)
|
||||
{
|
||||
return
|
||||
snprintf(buf, buf_size, ": %s%s%s%s%s%s%s",
|
||||
r1 ? r1 : "unknown",
|
||||
r2 ? "; " : "", r2 ? r2 : "",
|
||||
r3 ? "; " : "", r3 ? r3 : "",
|
||||
r4 ? "; " : "", r4 ? r4 : "");
|
||||
}
|
||||
|
||||
static void
|
||||
custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent *event,
|
||||
const struct trace_intel_end_stall *payload)
|
||||
{
|
||||
char buf[256];
|
||||
size_t buf_size = 0;
|
||||
|
||||
{
|
||||
auto data = event->add_extra_data();
|
||||
data->set_name("stall_reason");
|
||||
data->set_name("reason");
|
||||
|
||||
snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s : %s%s%s%s%s%s%s",
|
||||
(payload->flags & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "",
|
||||
(payload->flags & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "",
|
||||
(payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
|
||||
(payload->flags & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "",
|
||||
(payload->flags & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "",
|
||||
(payload->flags & INTEL_DS_L3_FABRIC_FLUSH_BIT) ? "+l3_fabric_flush" : "",
|
||||
(payload->flags & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "",
|
||||
(payload->flags & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "",
|
||||
(payload->flags & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "",
|
||||
(payload->flags & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "",
|
||||
(payload->flags & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "",
|
||||
(payload->flags & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "",
|
||||
(payload->flags & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "",
|
||||
(payload->flags & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "",
|
||||
(payload->flags & INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) ? "+udp_flush" : "",
|
||||
(payload->flags & INTEL_DS_END_OF_PIPE_BIT) ? "+eop" : "",
|
||||
(payload->flags & INTEL_DS_CCS_CACHE_FLUSH_BIT) ? "+ccs_flush" : "",
|
||||
(payload->reason1) ? payload->reason1 : "unknown",
|
||||
(payload->reason2) ? "; " : "",
|
||||
(payload->reason2) ? payload->reason2 : "",
|
||||
(payload->reason3) ? "; " : "",
|
||||
(payload->reason3) ? payload->reason3 : "",
|
||||
(payload->reason4) ? "; " : "",
|
||||
(payload->reason4) ? payload->reason4 : "");
|
||||
buf_size += snprintf_flags(buf + buf_size, sizeof(buf) - buf_size,
|
||||
(enum intel_ds_stall_flag) payload->flags);
|
||||
buf_size += snprintf_reasons(buf + buf_size, sizeof(buf) - buf_size,
|
||||
payload->reason1, payload->reason2,
|
||||
payload->reason3, payload->reason4);
|
||||
assert(strlen(buf) > 0);
|
||||
|
||||
data->set_value(buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
custom_trace_payload_as_extra_end_barrier(perfetto::protos::pbzero::GpuRenderStageEvent *event,
|
||||
const struct trace_intel_end_barrier *payload)
|
||||
{
|
||||
char buf[256];
|
||||
size_t buf_size = 0;
|
||||
|
||||
{
|
||||
auto data = event->add_extra_data();
|
||||
data->set_name("reason");
|
||||
|
||||
buf_size += snprintf_stages(buf + buf_size, sizeof(buf) - buf_size,
|
||||
(enum intel_ds_barrier_type) payload->type,
|
||||
(enum intel_ds_stages) payload->signal_stages,
|
||||
(enum intel_ds_stages) payload->wait_stages);
|
||||
buf_size += snprintf_flags(buf + buf_size, sizeof(buf) - buf_size,
|
||||
(enum intel_ds_stall_flag) payload->flags);
|
||||
buf_size += snprintf_reasons(buf + buf_size, sizeof(buf) - buf_size,
|
||||
payload->reason1, payload->reason2,
|
||||
payload->reason3, payload->reason4);
|
||||
assert(strlen(buf) > 0);
|
||||
|
||||
data->set_value(buf);
|
||||
|
|
@ -514,6 +584,34 @@ intel_ds_end_stall(struct intel_ds_device *device,
|
|||
(trace_payload_as_extra_func)custom_trace_payload_as_extra_end_stall);
|
||||
}
|
||||
|
||||
void
|
||||
intel_ds_begin_barrier(struct intel_ds_device *device,
|
||||
uint64_t ts_ns,
|
||||
uint16_t tp_idx,
|
||||
const void *flush_data,
|
||||
const struct trace_intel_begin_barrier *payload,
|
||||
const void *indirect_data)
|
||||
{
|
||||
const struct intel_ds_flush_data *flush =
|
||||
(const struct intel_ds_flush_data *) flush_data;
|
||||
begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL);
|
||||
}
|
||||
|
||||
void
|
||||
intel_ds_end_barrier(struct intel_ds_device *device,
|
||||
uint64_t ts_ns,
|
||||
uint16_t tp_idx,
|
||||
const void *flush_data,
|
||||
const struct trace_intel_end_barrier *payload,
|
||||
const void *indirect_data)
|
||||
{
|
||||
const struct intel_ds_flush_data *flush =
|
||||
(const struct intel_ds_flush_data *) flush_data;
|
||||
end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL,
|
||||
flush->submission_id, tp_idx, NULL, payload, indirect_data,
|
||||
(trace_payload_as_extra_func)custom_trace_payload_as_extra_end_barrier);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
intel_ds_begin_submit(struct intel_ds_queue *queue)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -65,6 +65,22 @@ enum intel_ds_stall_flag {
|
|||
INTEL_DS_L3_FABRIC_FLUSH_BIT = BITFIELD_BIT(17),
|
||||
};
|
||||
|
||||
enum intel_ds_barrier_type {
|
||||
INTEL_DS_BARRIER_TYPE_IMMEDIATE,
|
||||
INTEL_DS_BARRIER_TYPE_SIGNAL,
|
||||
INTEL_DS_BARRIER_TYPE_WAIT,
|
||||
};
|
||||
|
||||
enum intel_ds_stages {
|
||||
INTEL_DS_STAGES_TOP_BIT = BITFIELD_BIT(0),
|
||||
INTEL_DS_STAGES_GEOM_BIT = BITFIELD_BIT(1),
|
||||
INTEL_DS_STAGES_RASTER_BIT = BITFIELD_BIT(2),
|
||||
INTEL_DS_STAGES_DEPTH_BIT = BITFIELD_BIT(3),
|
||||
INTEL_DS_STAGES_PIXEL_BIT = BITFIELD_BIT(4),
|
||||
INTEL_DS_STAGES_COLOR_BIT = BITFIELD_BIT(5),
|
||||
INTEL_DS_STAGES_GPGPU_BIT = BITFIELD_BIT(6),
|
||||
};
|
||||
|
||||
enum intel_ds_tracepoint_flags {
|
||||
/**
|
||||
* Whether the tracepoint's timestamp must be recorded with as an
|
||||
|
|
@ -85,6 +101,9 @@ enum intel_ds_tracepoint_flags {
|
|||
/* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */
|
||||
typedef enum intel_ds_stall_flag (*intel_ds_stall_cb_t)(uint32_t flags);
|
||||
|
||||
/* Convert internal driver RESOUCE_BARRIER stages bits to intel_ds_stage. */
|
||||
typedef enum intel_ds_stages (*intel_ds_stages_cb_t)(uint8_t stages);
|
||||
|
||||
enum intel_ds_queue_stage {
|
||||
INTEL_DS_QUEUE_STAGE_QUEUE,
|
||||
INTEL_DS_QUEUE_STAGE_FRAME,
|
||||
|
|
|
|||
|
|
@ -272,10 +272,46 @@ def define_tracepoints(args):
|
|||
bits.append(Arg(type='bool', name=a[1], var='__entry->flags & INTEL_DS_{0}_BIT'.format(a[0]), c_format='%u'))
|
||||
return bits
|
||||
|
||||
def stall_args(args):
|
||||
def stall_args(stall_bits):
|
||||
fmt = ''
|
||||
exprs = []
|
||||
for a in args:
|
||||
for a in stall_bits:
|
||||
fmt += '%s'
|
||||
exprs.append('(__entry->flags & INTEL_DS_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
|
||||
fmt += ' : %s%s%s%s%s%s%s'
|
||||
exprs.append('(__entry->reason1) ? __entry->reason1 : "unknown"')
|
||||
exprs.append('(__entry->reason2) ? "; " : ""')
|
||||
exprs.append('(__entry->reason2) ? __entry->reason2 : ""')
|
||||
exprs.append('(__entry->reason3) ? "; " : ""')
|
||||
exprs.append('(__entry->reason3) ? __entry->reason3 : ""')
|
||||
exprs.append('(__entry->reason4) ? "; " : ""')
|
||||
exprs.append('(__entry->reason4) ? __entry->reason4 : ""')
|
||||
# To printout flags
|
||||
# fmt += '(0x%08x)'
|
||||
# exprs.append('__entry->flags')
|
||||
fmt = [fmt]
|
||||
fmt += exprs
|
||||
return fmt
|
||||
|
||||
def barrier_args(stage_bits, stall_bits):
|
||||
fmt = ''
|
||||
exprs = []
|
||||
|
||||
fmt += '%s '
|
||||
exprs.append('(__entry->type == INTEL_DS_BARRIER_TYPE_IMMEDIATE) ? "IMMEDIATE" : ' +
|
||||
'(__entry->type == INTEL_DS_BARRIER_TYPE_SIGNAL) ? "SIGNAL" : ' +
|
||||
'(__entry->type == INTEL_DS_BARRIER_TYPE_WAIT) ? "WAIT" : "unknown"')
|
||||
|
||||
for a in stages_bits:
|
||||
fmt += '%s'
|
||||
exprs.append('(__entry->signal_stages & INTEL_DS_STAGES_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
|
||||
fmt += '->'
|
||||
for a in stages_bits:
|
||||
fmt += '%s'
|
||||
exprs.append('(__entry->wait_stages & INTEL_DS_STAGES_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
|
||||
fmt += ': '
|
||||
|
||||
for a in stall_bits:
|
||||
fmt += '%s'
|
||||
exprs.append('(__entry->flags & INTEL_DS_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
|
||||
fmt += ' : %s%s%s%s%s%s%s'
|
||||
|
|
@ -311,6 +347,37 @@ def define_tracepoints(args):
|
|||
['END_OF_PIPE', 'eop'],
|
||||
['CCS_CACHE_FLUSH', 'ccs_flush']]
|
||||
|
||||
stages_bits = [['TOP', 'top'],
|
||||
['GEOM', 'geom'],
|
||||
['RASTER', 'raster'],
|
||||
['DEPTH', 'depth'],
|
||||
['PIXEL', 'pixel'],
|
||||
['COLOR', 'color'],
|
||||
['GPGPU', 'gpgpu']]
|
||||
|
||||
begin_end_tp('barrier',
|
||||
tp_args=[ArgStruct(type='uint8_t', var='type'),
|
||||
ArgStruct(type='uint32_t', var='signal_stages'),
|
||||
ArgStruct(type='uint32_t', var='wait_stages'),
|
||||
ArgStruct(type='intel_ds_stages_cb_t', var='decode_stage_cb'),
|
||||
ArgStruct(type='uint32_t', var='flags'),
|
||||
ArgStruct(type='intel_ds_stall_cb_t', var='decode_flags_cb'),
|
||||
ArgStruct(type='const char *', var='reason1'),
|
||||
ArgStruct(type='const char *', var='reason2'),
|
||||
ArgStruct(type='const char *', var='reason3'),
|
||||
ArgStruct(type='const char *', var='reason4'),],
|
||||
tp_struct=[Arg(type='uint8_t', name='type', var='type', c_format='0x%hhx'),
|
||||
Arg(type='uint8_t', name='signal_stages', var='decode_stage_cb(signal_stages)', c_format='0x%hhx'),
|
||||
Arg(type='uint8_t', name='wait_stages', var='decode_stage_cb(wait_stages)', c_format='0x%hhx'),
|
||||
Arg(type='uint32_t', name='flags', var='decode_flags_cb(flags)', c_format='0x%x'),
|
||||
Arg(type='const char *', name='reason1', var='reason1', c_format='%s'),
|
||||
Arg(type='const char *', name='reason2', var='reason2', c_format='%s'),
|
||||
Arg(type='const char *', name='reason3', var='reason3', c_format='%s'),
|
||||
Arg(type='const char *', name='reason4', var='reason4', c_format='%s'),],
|
||||
tp_print=barrier_args(stages_bits, stall_flags),
|
||||
tp_default_enabled=False,
|
||||
end_pipelined=False)
|
||||
|
||||
begin_end_tp('stall',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='flags'),
|
||||
ArgStruct(type='intel_ds_stall_cb_t', var='decode_cb'),
|
||||
|
|
|
|||
|
|
@ -1734,6 +1734,41 @@ anv_dump_rsc_barrier_body(const struct GENX(RESOURCE_BARRIER_BODY) body) {
|
|||
mesa_log_stream_destroy(stream);
|
||||
}
|
||||
|
||||
static inline enum intel_ds_stages
|
||||
resource_barrier_stage_to_ds(uint8_t stages)
|
||||
{
|
||||
enum intel_ds_stages ret = 0;
|
||||
|
||||
u_foreach_bit(b, stages) {
|
||||
switch (BITFIELD_BIT(b)) {
|
||||
case RESOURCE_BARRIER_STAGE_TOP:
|
||||
ret |= INTEL_DS_STAGES_TOP_BIT;
|
||||
break;
|
||||
case RESOURCE_BARRIER_STAGE_GEOM:
|
||||
ret |= INTEL_DS_STAGES_GEOM_BIT;
|
||||
break;
|
||||
case RESOURCE_BARRIER_STAGE_RASTER:
|
||||
ret |= INTEL_DS_STAGES_RASTER_BIT;
|
||||
break;
|
||||
case RESOURCE_BARRIER_STAGE_DEPTH:
|
||||
ret |= INTEL_DS_STAGES_DEPTH_BIT;
|
||||
break;
|
||||
case RESOURCE_BARRIER_STAGE_PIXEL:
|
||||
ret |= INTEL_DS_STAGES_PIXEL_BIT;
|
||||
break;
|
||||
case RESOURCE_BARRIER_STAGE_COLOR:
|
||||
ret |= INTEL_DS_STAGES_COLOR_BIT;
|
||||
break;
|
||||
case RESOURCE_BARRIER_STAGE_GPGPU:
|
||||
ret |= INTEL_DS_STAGES_GPGPU_BIT;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("invalid barrier stage");
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static enum GENX(RESOURCE_BARRIER_STAGE)
|
||||
resource_barrier_signal_stage(enum intel_engine_class engine_class,
|
||||
const VkPipelineStageFlags2 vk_stages)
|
||||
|
|
@ -1928,6 +1963,8 @@ emit_resource_barrier(struct anv_batch *batch,
|
|||
struct anv_address signal_addr,
|
||||
struct anv_address wait_addr)
|
||||
{
|
||||
trace_intel_begin_barrier(batch->trace);
|
||||
|
||||
enum GENX(RESOURCE_BARRIER_STAGE) signal_stages =
|
||||
resource_barrier_signal_stage(batch->engine_class, src_stages);
|
||||
enum GENX(RESOURCE_BARRIER_STAGE) wait_stages =
|
||||
|
|
@ -2034,6 +2071,20 @@ emit_resource_barrier(struct anv_batch *batch,
|
|||
|
||||
anv_dump_rsc_barrier_body(barrier.ResourceBarrierBody);
|
||||
}
|
||||
|
||||
trace_intel_end_barrier(batch->trace,
|
||||
barrier_type, signal_stages, wait_stages,
|
||||
resource_barrier_stage_to_ds,
|
||||
bits, anv_pipe_flush_bit_to_ds_stall_flag,
|
||||
batch->pc_reasons[0],
|
||||
batch->pc_reasons[1],
|
||||
batch->pc_reasons[2],
|
||||
batch->pc_reasons[3]);
|
||||
batch->pc_reasons[0] = NULL;
|
||||
batch->pc_reasons[1] = NULL;
|
||||
batch->pc_reasons[2] = NULL;
|
||||
batch->pc_reasons[3] = NULL;
|
||||
batch->pc_reasons_count = 0;
|
||||
}
|
||||
|
||||
#endif /* GFX_VER >= 20 */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue