intel/perf: Add support for LNL OA sample format size

LNL OA sample format is 576 bytes long while previous platforms were
256 bytes, so now we need a function to return the OA sample
format size.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29529>
This commit is contained in:
José Roberto de Souza 2024-05-23 14:07:05 -07:00 committed by Marge Bot
parent 18775827bd
commit f684f4efb0
7 changed files with 41 additions and 28 deletions

View file

@ -221,12 +221,15 @@ i915_oa_metrics_available(struct intel_perf_config *perf, int fd, bool use_regis
}
int
i915_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer,
i915_perf_stream_read_samples(struct intel_perf_config *perf_config,
int perf_stream_fd, uint8_t *buffer,
size_t buffer_len)
{
const size_t sample_header_size = perf_config->oa_sample_size +
sizeof(struct intel_perf_record_header);
int len;
if (buffer_len < INTEL_PERF_OA_HEADER_SAMPLE_SIZE)
if (buffer_len < sample_header_size)
return -ENOSPC;
do {

View file

@ -19,7 +19,7 @@ int i915_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
uint32_t ctx_id, uint64_t metrics_set_id,
uint64_t report_format, uint64_t period_exponent,
bool hold_preemption, bool enable);
int i915_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer, size_t buffer_len);
int i915_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd, uint8_t *buffer, size_t buffer_len);
int i915_perf_stream_set_state(int perf_stream_fd, bool enable);
int i915_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id);

View file

@ -1445,7 +1445,7 @@ intel_perf_init_query_fields(struct intel_perf_config *perf_cfg,
MAX_QUERY_FIELDS(devinfo));
add_query_register(perf_cfg, INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC,
0, 256, 0);
0, perf_cfg->oa_sample_size, 0);
if (use_register_snapshots) {
if (devinfo->ver <= 11) {
@ -1520,6 +1520,15 @@ intel_perf_init_query_fields(struct intel_perf_config *perf_cfg,
layout->size = align(layout->size, 64);
}
static size_t
intel_perf_get_oa_format_size(const struct intel_device_info *devinfo)
{
if (devinfo->verx10 >= 200)
return 576;
return 256;
}
void
intel_perf_init_metrics(struct intel_perf_config *perf_cfg,
const struct intel_device_info *devinfo,
@ -1528,6 +1537,8 @@ intel_perf_init_metrics(struct intel_perf_config *perf_cfg,
bool use_register_snapshots)
{
perf_cfg->devinfo = devinfo;
perf_cfg->oa_sample_size = intel_perf_get_oa_format_size(devinfo);
intel_perf_init_query_fields(perf_cfg, devinfo, use_register_snapshots);
if (include_pipeline_statistics) {
@ -1608,9 +1619,9 @@ intel_perf_stream_read_samples(struct intel_perf_config *perf_config,
{
switch (perf_config->devinfo->kmd_type) {
case INTEL_KMD_TYPE_I915:
return i915_perf_stream_read_samples(perf_stream_fd, buffer, buffer_len);
return i915_perf_stream_read_samples(perf_config, perf_stream_fd, buffer, buffer_len);
case INTEL_KMD_TYPE_XE:
return xe_perf_stream_read_samples(perf_stream_fd, buffer, buffer_len);
return xe_perf_stream_read_samples(perf_config, perf_stream_fd, buffer, buffer_len);
default:
unreachable("missing");
return -1;

View file

@ -125,13 +125,6 @@ struct intel_pipeline_stat {
#define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2)
#define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8)
/* Up to now all platforms uses the same sample size */
#define INTEL_PERF_OA_SAMPLE_SIZE 256
/* header + sample */
#define INTEL_PERF_OA_HEADER_SAMPLE_SIZE (sizeof(struct intel_perf_record_header) + \
INTEL_PERF_OA_SAMPLE_SIZE)
struct intel_perf_query_result {
/**
* Storage for the final accumulated OA counters.
@ -360,6 +353,7 @@ struct intel_perf_config {
int n_counters;
struct intel_perf_query_field_layout query_layout;
size_t oa_sample_size;
/* Variables referenced in the XML meta data for OA performance
* counters, e.g in the normalization equations.

View file

@ -163,10 +163,12 @@ struct oa_sample_buf {
struct exec_node link;
int refcount;
int len;
uint8_t buf[INTEL_PERF_OA_HEADER_SAMPLE_SIZE * 10];
uint32_t last_timestamp;
uint8_t buf[];
};
#define oa_sample_buf_buf_length(perf) (perf->oa_sample_size * 10)
/**
* gen representation of a performance query object.
*
@ -418,7 +420,7 @@ get_free_sample_buf(struct intel_perf_context *perf_ctx)
if (node)
buf = exec_node_data(struct oa_sample_buf, node, link);
else {
buf = ralloc_size(perf_ctx->perf, sizeof(*buf));
buf = ralloc_size(perf_ctx->perf, sizeof(*buf) + oa_sample_buf_buf_length(perf_ctx->perf));
exec_node_init(&buf->link);
buf->refcount = 0;
@ -973,7 +975,8 @@ read_oa_samples_until(struct intel_perf_context *perf_ctx,
len = intel_perf_stream_read_samples(perf_ctx->perf,
perf_ctx->oa_stream_fd,
buf->buf, sizeof(buf->buf));
buf->buf,
oa_sample_buf_buf_length(perf_ctx->perf));
if (len <= 0) {
exec_list_push_tail(&perf_ctx->free_sample_buffers, &buf->link);

View file

@ -211,15 +211,17 @@ xe_perf_stream_read_error(int perf_stream_fd, uint8_t *buffer, size_t buffer_len
}
int
xe_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer,
size_t buffer_len)
xe_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd,
uint8_t *buffer, size_t buffer_len)
{
uint32_t num_samples = buffer_len / INTEL_PERF_OA_HEADER_SAMPLE_SIZE;
const size_t max_bytes_read = num_samples * INTEL_PERF_OA_SAMPLE_SIZE;
const size_t sample_size = perf_config->oa_sample_size;
const size_t sample_header_size = sample_size + sizeof(struct intel_perf_record_header);
uint32_t num_samples = buffer_len / sample_header_size;
const size_t max_bytes_read = num_samples * sample_size;
uint8_t *offset, *offset_samples;
int len, i;
if (buffer_len < INTEL_PERF_OA_HEADER_SAMPLE_SIZE)
if (buffer_len < sample_header_size)
return -ENOSPC;
do {
@ -233,7 +235,7 @@ xe_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer,
return len < 0 ? -errno : 0;
}
num_samples = len / INTEL_PERF_OA_SAMPLE_SIZE;
num_samples = len / sample_size;
offset = buffer;
offset_samples = buffer + (buffer_len - len);
/* move all samples to the end of buffer */
@ -246,12 +248,12 @@ xe_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer,
/* TODO: also append REPORT_LOST and BUFFER_LOST */
header->type = INTEL_PERF_RECORD_TYPE_SAMPLE;
header->pad = 0;
header->size = INTEL_PERF_OA_HEADER_SAMPLE_SIZE;
header->size = sample_header_size;
offset += sizeof(*header);
memmove(offset, offset_samples, INTEL_PERF_OA_SAMPLE_SIZE);
offset += INTEL_PERF_OA_SAMPLE_SIZE;
offset_samples += INTEL_PERF_OA_SAMPLE_SIZE;
memmove(offset, offset_samples, sample_size);
offset += sample_size;
offset_samples += sample_size;
}
return offset - buffer;

View file

@ -25,5 +25,5 @@ int xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
bool hold_preemption, bool enable);
int xe_perf_stream_set_state(int perf_stream_fd, bool enable);
int xe_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id);
int xe_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer,
size_t buffer_len);
int xe_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd,
uint8_t *buffer, size_t buffer_len);