diff --git a/src/freedreno/ds/fd_pps_driver.cc b/src/freedreno/ds/fd_pps_driver.cc index ad201d8f00c..5c3c5675409 100644 --- a/src/freedreno/ds/fd_pps_driver.cc +++ b/src/freedreno/ds/fd_pps_driver.cc @@ -7,9 +7,16 @@ #include #include -#include +#include +#include +#include + +#include + +#include "common/freedreno_common.h" #include "common/freedreno_dev_info.h" +#include "drm-uapi/msm_drm.h" #include "drm/freedreno_drmif.h" #include "drm/freedreno_ringbuffer.h" #include "perfcntrs/freedreno_dt.h" @@ -46,6 +53,8 @@ FreedrenoDriver::configure_counters(bool reset, bool wait) (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags); + assert(io); /* This is legacy path only */ + for (const auto &countable : countables) countable.configure(ring, reset); @@ -67,12 +76,85 @@ FreedrenoDriver::configure_counters(bool reset, bool wait) void FreedrenoDriver::collect_countables() { + assert(io); /* This is legacy path only */ + last_dump_ts = gpu_timestamp(); for (const auto &countable : countables) countable.collect(); } +int +FreedrenoDriver::configure_counters_stream() +{ + if (perfcntr_stream_fd >= 0) { + close(perfcntr_stream_fd); + perfcntr_stream_fd = -1; + } + + unsigned sample_size = sizeof(uint64_t) * (2 + countables.size()); + unsigned bufsz = 2 * sample_size; + unsigned bufsz_shift = ffs(util_next_power_of_two(bufsz)) - 1; + + struct drm_msm_perfcntr_group groups[num_perfcntrs]; + memset(groups, 0, sizeof(groups)); + + struct drm_msm_perfcntr_config req = { + .flags = MSM_PERFCNTR_STREAM, + .groups = VOID2U64(groups), + .period = sampling_period_ns_, + .bufsz_shift = bufsz_shift, + .group_stride = sizeof(struct drm_msm_perfcntr_group), + }; + + assert(req.period); + + for (const auto &countable : countables) + countable.configure_stream(&req); + + /* Now that the groups are fully populated, resolve the sample indices: */ + for (const auto &countable : countables) + countable.resolve_sample_idx(&req); + + int fd = drmIoctl(fd_device_fd(dev), DRM_IOCTL_MSM_PERFCNTR_CONFIG, &req); + if (fd < 0) + return fd; + + sample_buf = malloc(sample_size); + + perfcntr_stream_fd = fd; + + /* Unlike the legacy path, the kernel handles reconfiguring counters + * after power collapse for us, so we won't need to configure the + * stream again. So cleanup allocated memory now: + */ + for (unsigned i = 0; i < num_perfcntrs; i++) { + if (!groups[i].countables) + break; + free(U642VOID(groups[i].countables)); + } + + return 0; +} + +static bool +perfcntr_stream_ready(int perfcntr_stream_fd) +{ + struct pollfd pfd; + + pfd.fd = perfcntr_stream_fd; + pfd.events = POLLIN; + pfd.revents = 0; + + if (poll(&pfd, 1, 0) < 0) + return false; + + if (!(pfd.revents & POLLIN)) + return false; + + return true; +} + static uint64_t ticks_to_ns(uint64_t ticks) { @@ -82,6 +164,61 @@ ticks_to_ns(uint64_t ticks) return ticks / GPU_TICKS_PER_NS; } +bool +FreedrenoDriver::collect_countables_stream() +{ + unsigned nsamples = 0; + bool discontinuity = false; + + assert(perfcntr_stream_fd >= 0); + + while (perfcntr_stream_ready(perfcntr_stream_fd)) { + unsigned sample_size = sizeof(uint64_t) * (2 + countables.size()); + size_t sz = sample_size; + void *ptr = sample_buf; + + while (sz > 0) { + ssize_t ret = read(perfcntr_stream_fd, ptr, sz); + + if (ret < 0) + ret = -errno; + + if (ret == -EINTR || ret == -EAGAIN) + continue; + + if (ret < 0) + errx(ret, "read failed"); + + sz -= ret; + ptr = static_cast(ptr) + ret; + } + + uint64_t *buf = (uint64_t *)sample_buf; + uint64_t ts = buf[0]; + uint32_t seqno = buf[1] & 0xffffffff; + + discontinuity = seqno == 0; + + /* Capture the timestamp from the *start* of the sampling period: */ + last_capture_ts = last_dump_ts; + last_dump_ts = ts; + + auto elapsed_time_ns = ticks_to_ns(last_dump_ts - last_capture_ts); + + time = (float)elapsed_time_ns / 1000000000.0; + + /* advance past header: */ + buf += 2; + + for (const auto &countable : countables) + countable.collect_stream(buf); + + nsamples++; + } + + return (nsamples > 0) && !discontinuity; +} + bool FreedrenoDriver::init_perfcnt() { @@ -107,9 +244,7 @@ FreedrenoDriver::init_perfcnt() has_suspend_count = true; } - fd_pipe_set_param(pipe, FD_SYSPROF, 1); - - perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs); + perfcntrs = fd_perfcntrs(dev_id, &num_perfcntrs); if (num_perfcntrs == 0) { PERFETTO_FATAL("No hw counters available"); return false; @@ -137,12 +272,20 @@ FreedrenoDriver::init_perfcnt() for (const auto &countable : countables) countable.resolve(); + if (!configure_counters_stream()) { + close(perfcntr_stream_fd); + perfcntr_stream_fd = -1; + return true; + } + io = fd_dt_find_io(); if (!io) { PERFETTO_FATAL("Could not map GPU I/O space"); return false; } + fd_pipe_set_param(pipe, FD_SYSPROF, 1); + configure_counters(true, true); collect_countables(); @@ -165,14 +308,26 @@ FreedrenoDriver::enable_all_counters() } void -FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */) +FreedrenoDriver::enable_perfcnt(const uint64_t sampling_period_ns) { + sampling_period_ns_ = sampling_period_ns; + + if (!io) { + /* reconfigure counter stream: */ + configure_counters_stream(); + collect_countables_stream(); + } } bool FreedrenoDriver::dump_perfcnt() { - if (has_suspend_count) { + /* Note, when using perfcntr stream instead of mmio basec counter + * reads, we can skip this (since the seqno in the data read from + * the stream will tell us if there is a discontinuity, and the + * kernel will handle reconfiguring counters on resume) + */ + if (has_suspend_count && io) { uint64_t val; fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val); @@ -193,6 +348,9 @@ FreedrenoDriver::dump_perfcnt() } } + if (!io) + return collect_countables_stream(); + auto last_ts = last_dump_ts; /* Capture the timestamp from the *start* of the sampling period: */ @@ -223,11 +381,13 @@ uint64_t FreedrenoDriver::next() return ret; } -void FreedrenoDriver::disable_perfcnt() +void +FreedrenoDriver::disable_perfcnt() { - /* There isn't really any disable, only reconfiguring which countables - * get muxed to which counters - */ + if (perfcntr_stream_fd >= 0) { + close(perfcntr_stream_fd); + perfcntr_stream_fd = -1; + } } /* @@ -278,6 +438,80 @@ FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset) co } } +void +FreedrenoDriver::Countable::configure_stream(struct drm_msm_perfcntr_config *req) const +{ + const struct fd_perfcntr_countable *countable = d->state[id].countable; + struct drm_msm_perfcntr_group *groups = + (struct drm_msm_perfcntr_group *)U642VOID(req->groups); + + /* Find group: */ + struct drm_msm_perfcntr_group *g = NULL; + + for (unsigned i = 0; i < req->nr_groups; i++) { + if (!strcmp(groups[i].group_name, group.c_str())) { + g = &groups[i]; + break; + } + } + + /* If not found, append a new group: */ + if (!g) { + g = &groups[req->nr_groups++]; + strcpy(g->group_name, group.c_str()); + + /* allocate countables for max # of counters in the group */ + for (unsigned i = 0; i < d->num_perfcntrs; i++) { + if (!strcmp(d->perfcntrs[i].name, group.c_str())) { + void *countables = calloc(sizeof(uint32_t), d->perfcntrs[i].num_counters); + g->countables = VOID2U64(countables); + break; + } + } + + assert(g->countables); + } + + /* Initially, just store the index within the group, since earlier groups + * are not yet fully populated (ie. we don't yet know the offset of the + * first sample in the group) + */ + d->state[id].idx = g->nr_countables; + + /* And last, append the countable: */ + uint32_t *countables = (uint32_t *)U642VOID(g->countables); + countables[g->nr_countables++] = countable->selector; +} + +static unsigned +find_group_offset(const struct drm_msm_perfcntr_config *req, const char *group) +{ + struct drm_msm_perfcntr_group *groups = + (struct drm_msm_perfcntr_group *)U642VOID(req->groups); + unsigned off = 0; + + for (unsigned i = 0; i < req->nr_groups; i++) { + if (!strcmp(groups[i].group_name, group)) + break; + off += groups[i].nr_countables; + } + + return off; +} + +void +FreedrenoDriver::Countable::resolve_sample_idx(const struct drm_msm_perfcntr_config *req) const +{ + d->state[id].idx += find_group_offset(req, group.c_str()); +} + +void +FreedrenoDriver::Countable::collect_stream(const uint64_t *buf) const +{ + d->state[id].last_value = d->state[id].value; + d->state[id].value = buf[d->state[id].idx]; +} + /* Collect current counter value and calculate delta since last sample: */ void FreedrenoDriver::Countable::collect() const diff --git a/src/freedreno/ds/fd_pps_driver.h b/src/freedreno/ds/fd_pps_driver.h index 81395714581..4da5cb6d808 100644 --- a/src/freedreno/ds/fd_pps_driver.h +++ b/src/freedreno/ds/fd_pps_driver.h @@ -6,6 +6,7 @@ #pragma once #include "pps/pps_driver.h" +#include "drm-uapi/msm_drm.h" extern "C" { struct fd_dev_id; @@ -54,10 +55,26 @@ private: const struct fd_dev_info *info; /** - * The memory mapped i/o space for counter readback: + * The memory mapped i/o space for counter readback (legacy): */ void *io; + /** + * perfcntr stream fd, if not using memory mapped i/o for counter + * readback. + */ + int perfcntr_stream_fd = -1; + + /** + * The configured sampling period + */ + uint64_t sampling_period_ns_ = 1000000000; + + /** + * Buffer used to read samples + */ + void *sample_buf; + const struct fd_perfcntr_group *perfcntrs; unsigned num_perfcntrs; @@ -79,6 +96,9 @@ private: void configure_counters(bool reset, bool wait); void collect_countables(); + int configure_counters_stream(); + bool collect_countables_stream(); + /** * Split out countable mutable state from the class so that copy- * constructor does something sane when lambda derive function @@ -88,6 +108,9 @@ private: uint64_t last_value, value; const struct fd_perfcntr_countable *countable; const struct fd_perfcntr_counter *counter; + + /* index into perfcntr stream sample buf: */ + unsigned idx; }; std::vector state; @@ -115,6 +138,11 @@ private: void collect() const; void resolve() const; + /* perfcntr stream related APIs */ + void configure_stream(struct drm_msm_perfcntr_config *req) const; + void resolve_sample_idx(const struct drm_msm_perfcntr_config *req) const; + void collect_stream(const uint64_t *buf) const; + private: uint64_t get_value() const;