diff --git a/src/panfrost/ds/pan_pps_perf.cpp b/src/panfrost/ds/pan_pps_perf.cpp index 41e89bd2e27..fce1792f55f 100644 --- a/src/panfrost/ds/pan_pps_perf.cpp +++ b/src/panfrost/ds/pan_pps_perf.cpp @@ -46,6 +46,7 @@ PanfrostPerf::~PanfrostPerf() { if (perf) { pan_perf_disable(perf); + pan_perf_finish(perf); ralloc_free(perf); } } diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 604e0f75a26..bd579c39137 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -1,5 +1,6 @@ /* * Copyright © 2021 Collabora, Ltd. + * Copyright © 2026 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -8,7 +9,6 @@ #include #include "util/macros.h" -#include "util/ralloc.h" #include "pan_perf.h" @@ -17,23 +17,38 @@ #include #include -#define PAN_COUNTERS_PER_CATEGORY 64 #define PAN_SHADER_CORE_INDEX 3 -uint32_t +int64_t +pan_perf_counter_read_raw(const struct pan_perf *perf, uint8_t category_index, + uint8_t block_index, uint32_t counter_index) +{ + assert(perf->session->data != NULL); + + const uint32_t val_offset = + perf->mem_layout.category[category_index].offset + + perf->mem_layout.block_stride * block_index + + perf->mem_layout.counter_stride * counter_index; + + uint8_t *val_ptr = ((uint8_t *)perf->session->data) + val_offset; + return pan_kmod_perf_load_counter(perf->session, val_ptr); +} + +int64_t pan_perf_counter_read(const struct pan_perf_counter *counter, const struct pan_perf *perf) { - unsigned offset = perf->category_offset[counter->category_index]; - offset += counter->offset; - assert(offset < perf->n_counter_values); + int64_t ret = pan_perf_counter_read_raw(perf, counter->category_index, 0, + counter->offset); - uint32_t ret = perf->counter_values[offset]; - - // If counter belongs to shader core, accumulate values for all other cores + /* If counter belongs to shader core, sum values for all cores. */ if (counter->category_index == PAN_SHADER_CORE_INDEX) { - for (uint32_t core = 1; core < perf->core_id_range; ++core) { - ret += perf->counter_values[offset + PAN_COUNTERS_PER_CATEGORY * core]; + uint32_t n_cores = + perf->mem_layout.category[PAN_SHADER_CORE_INDEX].n_blocks; + for (uint32_t core = 1; core < n_cores; ++core) { + ret += pan_perf_counter_read_raw(perf, PAN_SHADER_CORE_INDEX, core, + counter->offset); + assert(ret >= 0 && "counter sum should not overflow"); } } @@ -64,6 +79,9 @@ pan_perf_init(struct pan_perf *perf, int fd) perf->dev = pan_kmod_dev_create(fd, 0, NULL); assert(perf->dev); + perf->session = pan_kmod_perf_create(perf->dev); + assert(perf->session); + struct pan_kmod_dev_props props = perf->dev->props; const struct pan_model *model = @@ -76,49 +94,29 @@ pan_perf_init(struct pan_perf *perf, int fd) if (perf->cfg == NULL) UNREACHABLE("Performance counters missing!"); - // Generally counter blocks are laid out in the following order: - // Job manager, tiler, one or more L2 caches, and one or more shader cores. - unsigned l2_slices = pan_query_l2_slices(&props); - pan_query_core_count(&props, &perf->core_id_range); - - uint32_t n_blocks = 2 + l2_slices + perf->core_id_range; - perf->n_counter_values = PAN_COUNTERS_PER_CATEGORY * n_blocks; - perf->counter_values = ralloc_array(perf, uint32_t, perf->n_counter_values); - - /* Setup the layout */ - perf->category_offset[0] = PAN_COUNTERS_PER_CATEGORY * 0; - perf->category_offset[1] = PAN_COUNTERS_PER_CATEGORY * 1; - perf->category_offset[2] = PAN_COUNTERS_PER_CATEGORY * 2; - perf->category_offset[3] = PAN_COUNTERS_PER_CATEGORY * (2 + l2_slices); -} - -static int -pan_perf_query(struct pan_perf *perf, uint32_t enable) -{ - struct drm_panfrost_perfcnt_enable perfcnt_enable = {enable, 0}; - return pan_kmod_ioctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_ENABLE, - &perfcnt_enable); + pan_kmod_perf_query_layout(perf->session, &perf->mem_layout); } int pan_perf_enable(struct pan_perf *perf) { - return pan_perf_query(perf, 1 /* enable */); + return pan_kmod_perf_enable(perf->session); } int pan_perf_disable(struct pan_perf *perf) { - return pan_perf_query(perf, 0 /* disable */); + return pan_kmod_perf_disable(perf->session); +} + +void +pan_perf_finish(struct pan_perf *perf) +{ + pan_kmod_perf_destroy(perf->session); } int pan_perf_dump(struct pan_perf *perf) { - // Dump performance counter values to the memory buffer pointed to by - // counter_values - struct drm_panfrost_perfcnt_dump perfcnt_dump = { - (uint64_t)(uintptr_t)perf->counter_values}; - return pan_kmod_ioctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_DUMP, - &perfcnt_dump); + return pan_kmod_perf_dump(perf->session); } diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index f60012b7d28..c3018af64f1 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -8,6 +8,8 @@ #include +#include + #if defined(__cplusplus) extern "C" { #endif @@ -15,12 +17,6 @@ extern "C" { #define PAN_PERF_MAX_CATEGORIES 4 #define PAN_PERF_MAX_COUNTERS 64 -struct pan_kmod_dev; -struct pan_kmod_dev_props; -struct pan_model; -struct pan_perf_category; -struct pan_perf; - enum pan_perf_counter_units { PAN_PERF_COUNTER_UNITS_CYCLES, PAN_PERF_COUNTER_UNITS_JOBS, @@ -68,19 +64,17 @@ struct pan_perf_config { struct pan_perf { struct pan_kmod_dev *dev; - unsigned core_id_range; + struct pan_kmod_perf_session *session; const struct pan_perf_config *cfg; - - // Memory where to dump counter values - uint32_t *counter_values; - uint32_t n_counter_values; - - /* Offsets of categories */ - unsigned category_offset[PAN_PERF_MAX_CATEGORIES]; + struct pan_kmod_perf_buffer_layout mem_layout; }; -uint32_t pan_perf_counter_read(const struct pan_perf_counter *counter, - const struct pan_perf *perf); +int64_t pan_perf_counter_read_raw(const struct pan_perf *perf, + uint8_t category_index, uint8_t block_index, + uint32_t counter_index); + +int64_t pan_perf_counter_read(const struct pan_perf_counter *counter, + const struct pan_perf *perf); void pan_perf_init(struct pan_perf *perf, int fd); @@ -88,6 +82,8 @@ int pan_perf_enable(struct pan_perf *perf); int pan_perf_disable(struct pan_perf *perf); +void pan_perf_finish(struct pan_perf *perf); + int pan_perf_dump(struct pan_perf *perf); #if defined(__cplusplus) diff --git a/src/panfrost/perf/quick.c b/src/panfrost/perf/quick.c index 1d72ca4928f..afe114fe198 100644 --- a/src/panfrost/perf/quick.c +++ b/src/panfrost/perf/quick.c @@ -45,8 +45,8 @@ main(void) for (unsigned j = 0; j < cat->n_counters; ++j) { const struct pan_perf_counter *ctr = &cat->counters[j]; - uint32_t val = pan_perf_counter_read(ctr, perf); - printf("%s (%s): %u\n", ctr->name, ctr->symbol_name, val); + int64_t val = pan_perf_counter_read(ctr, perf); + printf("%s (%s): %ld\n", ctr->name, ctr->symbol_name, val); } printf("\n");