diff --git a/docs/perfetto.rst b/docs/perfetto.rst index b65ffb99951..04410e69afe 100644 --- a/docs/perfetto.rst +++ b/docs/perfetto.rst @@ -220,6 +220,16 @@ To run the producer, follow these two simple steps: ./build/pps-producer +Panthor +^^^^^^^ + +The Panthor PPS driver uses stable IOCTLs and don't need any special privileges to enable. The +data source can be configured by running the producer : + +.. code-block:: sh + + ./build/pps-producer + V3D / V3DV ^^^^^^^^^^ diff --git a/include/drm-uapi/panthor_drm.h b/include/drm-uapi/panthor_drm.h index e238c6264fa..174f39dc2da 100644 --- a/include/drm-uapi/panthor_drm.h +++ b/include/drm-uapi/panthor_drm.h @@ -154,6 +154,9 @@ enum drm_panthor_ioctl_id { * This is useful for imported BOs. */ DRM_PANTHOR_BO_QUERY_INFO, + + /** @DRM_PANTHOR_PERF_CONTROL: Control a performance counter session. */ + DRM_PANTHOR_PERF_CONTROL, }; /** @@ -253,6 +256,9 @@ enum drm_panthor_dev_query_type { * @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group priorities information. */ DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, + + /** @DRM_PANTHOR_DEV_QUERY_PERF_INFO: Query performance counter interface information. */ + DRM_PANTHOR_DEV_QUERY_PERF_INFO, }; /** @@ -445,6 +451,138 @@ struct drm_panthor_group_priorities_info { __u8 pad[3]; }; +/** + * enum drm_panthor_perf_feat_flags - Performance counter configuration feature flags. + */ +enum drm_panthor_perf_feat_flags { + /** @DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT: Coarse-grained block states are supported. */ + DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT = 1 << 0, +}; + +/** + * enum drm_panthor_perf_block_type - Performance counter supported block types. + */ +enum drm_panthor_perf_block_type { + /** @DRM_PANTHOR_PERF_BLOCK_METADATA: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_METADATA = 0, + + /** @DRM_PANTHOR_PERF_BLOCK_FW: The FW counter block. */ + DRM_PANTHOR_PERF_BLOCK_FW, + + /** @DRM_PANTHOR_PERF_BLOCK_CSHW: The CSHW counter block. */ + DRM_PANTHOR_PERF_BLOCK_CSHW, + + /** @DRM_PANTHOR_PERF_BLOCK_TILER: The tiler counter block. */ + DRM_PANTHOR_PERF_BLOCK_TILER, + + /** @DRM_PANTHOR_PERF_BLOCK_MEMSYS: A memsys counter block. */ + DRM_PANTHOR_PERF_BLOCK_MEMSYS, + + /** @DRM_PANTHOR_PERF_BLOCK_SHADER: A shader core counter block. */ + DRM_PANTHOR_PERF_BLOCK_SHADER, + + /** @DRM_PANTHOR_PERF_BLOCK_FIRST: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_FIRST = DRM_PANTHOR_PERF_BLOCK_FW, + + /** @DRM_PANTHOR_PERF_BLOCK_LAST: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_LAST = DRM_PANTHOR_PERF_BLOCK_SHADER, + + /** @DRM_PANTHOR_PERF_BLOCK_MAX: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_MAX = DRM_PANTHOR_PERF_BLOCK_LAST + 1, +}; + +/** + * enum drm_panthor_perf_clock - Identifier of the clock used to produce the cycle count values + * in a given block. + * + * Since the integrator has the choice of using one or more clocks, there may be some confusion + * as to which blocks are counted by which clock values unless this information is explicitly + * provided as part of every block sample. Not every single clock here can be used: in the simplest + * case, all cycle counts will be associated with the top-level clock. + */ +enum drm_panthor_perf_clock { + /** @DRM_PANTHOR_PERF_CLOCK_TOPLEVEL: Top-level CSF clock. */ + DRM_PANTHOR_PERF_CLOCK_TOPLEVEL, + + /** + * @DRM_PANTHOR_PERF_CLOCK_COREGROUP: Core group clock, responsible for the MMU, L2 + * caches and the tiler. + */ + DRM_PANTHOR_PERF_CLOCK_COREGROUP, + + /** @DRM_PANTHOR_PERF_CLOCK_SHADER: Clock for the shader cores. */ + DRM_PANTHOR_PERF_CLOCK_SHADER, +}; + +/** + * struct drm_panthor_perf_info - Performance counter interface information + * + * Structure grouping all queryable information relating to the performance counter + * interfaces. + */ +struct drm_panthor_perf_info { + /** + * @counters_per_block: The number of 8-byte counters available in a block. + */ + __u32 counters_per_block; + + /** + * @sample_header_size: The size of the header struct available at the beginning + * of every sample. + */ + __u32 sample_header_size; + + /** + * @block_header_size: The size of the header struct inline with the counters for a + * single block. + */ + __u32 block_header_size; + + /** + * @sample_size: The size of a fully annotated sample, starting with a sample header + * of size @sample_header_size bytes, and all available blocks for the current + * configuration, each comprised of @counters_per_block 64-bit counters and + * a block header of @block_header_size bytes. + * + * The user must use this field to allocate size for the ring buffer. In + * the case of new blocks being added, an old userspace can always use + * this field and ignore any blocks it does not know about. + */ + __u32 sample_size; + + /** @flags: Combination of drm_panthor_perf_feat_flags flags. */ + __u32 flags; + + /** + * @supported_clocks: Bitmask of the clocks supported by the GPU. + * + * Each bit represents a variant of the enum drm_panthor_perf_clock. + * + * For the same GPU, different implementers may have different clocks for the same hardware + * block. At the moment, up to three clocks are supported, and any clocks that are present + * will be reported here. + */ + __u32 supported_clocks; + + /** @fw_blocks: Number of FW blocks available. */ + __u32 fw_blocks; + + /** @cshw_blocks: Number of CSHW blocks available. */ + __u32 cshw_blocks; + + /** @tiler_blocks: Number of tiler blocks available. */ + __u32 tiler_blocks; + + /** @memsys_blocks: Number of memsys blocks available. */ + __u32 memsys_blocks; + + /** @shader_blocks: Number of shader core blocks available. */ + __u32 shader_blocks; + + /** @pad: MBZ. */ + __u32 pad; +}; + /** * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY */ @@ -1187,6 +1325,434 @@ struct drm_panthor_bo_query_info { __u32 pad; }; +/** + * DOC: Performance counter decoding in userspace. + * + * Each sample will be exposed to userspace in the following manner: + * + * +--------+--------+------------------------+--------+-------------------------+-----+ + * | Sample | Block | Block | Block | Block | ... | + * | header | header | counters | header | counters | | + * +--------+--------+------------------------+--------+-------------------------+-----+ + * + * Each sample will start with a sample header of type @struct drm_panthor_perf_sample header, + * providing sample-wide information like the start and end timestamps, the counter set currently + * configured, and any errors that may have occurred during sampling. + * + * After the fixed size header, the sample will consist of blocks of + * 64-bit @drm_panthor_dev_query_perf_info::counters_per_block counters, each prefaced with a + * header of its own, indicating source block type, as well as the cycle count needed to normalize + * cycle values within that block, and a clock source identifier. + */ + +/** + * enum drm_panthor_perf_block_state - Bitmask of the power and execution states that an individual + * hardware block went through in a sampling period. + * + * Because the sampling period is controlled from userspace, the block may undergo multiple + * state transitions, so this must be interpreted as one or more such transitions occurring. + */ +enum drm_panthor_perf_block_state { + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN: The state of this block was unknown during + * the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN = 0, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_ON: This block was powered on for some or all of + * the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_ON = 1 << 0, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_OFF: This block was powered off for some or all of the + * sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_OFF = 1 << 1, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE: This block was available for execution for + * some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE = 1 << 2, + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE: This block was unavailable for execution for + * some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE = 1 << 3, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL: This block was executing in normal mode + * for some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL = 1 << 4, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED: This block was executing in protected mode + * for some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED = 1 << 5, +}; + +/** + * struct drm_panthor_perf_block_header - Header present before every block in the + * sample ringbuffer. + */ +struct drm_panthor_perf_block_header { + /** @block_type: Type of the block. */ + __u8 block_type; + + /** @block_idx: Block index. */ + __u8 block_idx; + + /** + * @block_states: Coarse-grained block transitions, bitmask of enum + * drm_panthor_perf_block_states. + */ + __u8 block_states; + + /** + * @clock: Clock used to produce the cycle count for this block, taken from + * enum drm_panthor_perf_clock. The cycle counts are stored in the sample header. + */ + __u8 clock; + + /** @pad: MBZ. */ + __u8 pad[4]; + + /** @enable_mask: Bitmask of counters requested during the session setup. */ + __u64 enable_mask[2]; +}; + +/** + * enum drm_panthor_perf_sample_flags - Sample-wide events that occurred over the sampling + * period. + */ +enum drm_panthor_perf_sample_flags { + /** + * @DRM_PANTHOR_PERF_SAMPLE_OVERFLOW: This sample contains overflows due to the duration + * of the sampling period. + */ + DRM_PANTHOR_PERF_SAMPLE_OVERFLOW = 1 << 0, + + /** + * @DRM_PANTHOR_PERF_SAMPLE_ERROR: This sample encountered an error condition during + * the sample duration. + */ + DRM_PANTHOR_PERF_SAMPLE_ERROR = 1 << 1, +}; + +/** + * struct drm_panthor_perf_sample_header - Header present before every sample. + */ +struct drm_panthor_perf_sample_header { + /** + * @timestamp_start_ns: Earliest timestamp that values in this sample represent, in + * nanoseconds. Derived from CLOCK_MONOTONIC_RAW. + */ + __u64 timestamp_start_ns; + + /** + * @timestamp_end_ns: Latest timestamp that values in this sample represent, in + * nanoseconds. Derived from CLOCK_MONOTONIC_RAW. + */ + __u64 timestamp_end_ns; + + /** @block_set: Set of performance counter blocks. */ + __u8 block_set; + + /** @pad: MBZ. */ + __u8 pad[3]; + + /** @flags: Current sample flags, combination of drm_panthor_perf_sample_flags. */ + __u32 flags; + + /** + * @user_data: User data provided as part of the command that triggered this sample. + * + * - Automatic samples (periodic ones or those around non-counting periods or power state + * transitions) will be tagged with the user_data provided as part of the + * DRM_PANTHOR_PERF_COMMAND_START call. + * - Manual samples will be tagged with the user_data provided with the + * DRM_PANTHOR_PERF_COMMAND_SAMPLE call. + * - A session's final automatic sample will be tagged with the user_data provided with the + * DRM_PANTHOR_PERF_COMMAND_STOP call. + */ + __u64 user_data; + + /** + * @toplevel_clock_cycles: The number of cycles elapsed between + * drm_panthor_perf_sample_header::timestamp_start_ns and + * drm_panthor_perf_sample_header::timestamp_end_ns on the top-level clock if the + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. + */ + __u64 toplevel_clock_cycles; + + /** + * @coregroup_clock_cycles: The number of cycles elapsed between + * drm_panthor_perf_sample_header::timestamp_start_ns and + * drm_panthor_perf_sample_header::timestamp_end_ns on the coregroup clock if the + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. + */ + __u64 coregroup_clock_cycles; + + /** + * @shader_clock_cycles: The number of cycles elapsed between + * drm_panthor_perf_sample_header::timestamp_start_ns and + * drm_panthor_perf_sample_header::timestamp_end_ns on the shader core clock if the + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. + */ + __u64 shader_clock_cycles; +}; + +/** + * enum drm_panthor_perf_command - Command type passed to the DRM_PANTHOR_PERF_CONTROL + * IOCTL. + */ +enum drm_panthor_perf_command { + /** @DRM_PANTHOR_PERF_COMMAND_SETUP: Create a new performance counter sampling context. */ + DRM_PANTHOR_PERF_COMMAND_SETUP, + + /** @DRM_PANTHOR_PERF_COMMAND_TEARDOWN: Teardown a performance counter sampling context. */ + DRM_PANTHOR_PERF_COMMAND_TEARDOWN, + + /** @DRM_PANTHOR_PERF_COMMAND_START: Start a sampling session on the indicated context. */ + DRM_PANTHOR_PERF_COMMAND_START, + + /** @DRM_PANTHOR_PERF_COMMAND_STOP: Stop the sampling session on the indicated context. */ + DRM_PANTHOR_PERF_COMMAND_STOP, + + /** + * @DRM_PANTHOR_PERF_COMMAND_SAMPLE: Request a manual sample on the indicated context. + * + * When the sampling session is configured with a non-zero sampling frequency, any + * DRM_PANTHOR_PERF_CONTROL calls with this command will be ignored and return an + * -EINVAL. + */ + DRM_PANTHOR_PERF_COMMAND_SAMPLE, +}; + +/** + * struct drm_panthor_perf_control - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL. + */ +struct drm_panthor_perf_control { + /** @cmd: Command from enum drm_panthor_perf_command. */ + __u32 cmd; + + /** + * @handle: session handle. + * + * Returned by the DRM_PANTHOR_PERF_COMMAND_SETUP call. + * It must be used in subsequent commands for the same context. + */ + __u32 handle; + + /** + * @size: size of the command structure. + * + * If the pointer is NULL, the size is updated by the driver to provide the size of the + * output structure. If the pointer is not NULL, the driver will only copy min(size, + * struct_size) to the pointer and update the size accordingly. + */ + __u64 size; + + /** + * @pointer: user pointer to a command type struct, such as + * @struct drm_panthor_perf_cmd_start. + */ + __u64 pointer; +}; + +/** + * enum drm_panthor_perf_counter_set - The counter set to be requested from the hardware. + * + * The hardware supports a single performance counter set at a time, so requesting any set other + * than the primary may fail if another process is sampling at the same time. + * + * If in doubt, the primary counter set has the most commonly used counters and requires no + * additional permissions to open. + */ +enum drm_panthor_perf_counter_set { + /** + * @DRM_PANTHOR_PERF_SET_PRIMARY: The default set configured on the hardware. + * + * This is the only set for which all counters in all blocks are defined. + */ + DRM_PANTHOR_PERF_SET_PRIMARY, + + /** + * @DRM_PANTHOR_PERF_SET_SECONDARY: The secondary performance counter set. + * + * Some blocks may not have any defined counters for this set, and the block will + * have the UNAVAILABLE block state permanently set in the block header. + * + * Accessing this set requires the calling process to have the CAP_PERFMON capability. + */ + DRM_PANTHOR_PERF_SET_SECONDARY, + + /** + * @DRM_PANTHOR_PERF_SET_TERTIARY: The tertiary performance counter set. + * + * Some blocks may not have any defined counters for this set, and the block will have + * the UNAVAILABLE block state permanently set in the block header. Note that the + * tertiary set has the fewest defined counter blocks. + * + * Accessing this set requires the calling process to have the CAP_PERFMON capability. + */ + DRM_PANTHOR_PERF_SET_TERTIARY, +}; + +/** + * struct drm_panthor_perf_ringbuf_control - Struct used to map in the ring buffer control indices + * into memory shared between user and kernel. + * + */ +struct drm_panthor_perf_ringbuf_control { + /** + * @extract_idx: The index of the latest sample that was processed by userspace. Only + * modifiable by userspace. + */ + __u64 extract_idx; + + /** + * @insert_idx: The index of the latest sample emitted by the kernel. Only modifiable by + * the kernel. + */ + __u64 insert_idx; +}; + +/** + * struct drm_panthor_perf_cmd_setup - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_SETUP command is specified. + */ +struct drm_panthor_perf_cmd_setup { + /** + * @block_set: Set of performance counter blocks, member of + * enum drm_panthor_perf_block_set. + * + * This is a global configuration and only one set can be active at a time. If + * another client has already requested a counter set, any further requests + * for a different counter set will fail and return an -EBUSY. + * + * If the requested set does not exist, the request will fail and return an -EINVAL. + * + * Some sets have additional requirements to be enabled, and the setup request will + * fail with an -EACCES if these requirements are not satisfied. + */ + __u8 block_set; + + /** @pad: MBZ. */ + __u8 pad[7]; + + /** @fd: eventfd for signalling the availability of a new sample. */ + __u32 fd; + + /** @ringbuf_handle: Handle to the BO to write perf counter sample to. */ + __u32 ringbuf_handle; + + /** + * @control_handle: Handle to the BO containing a contiguous 16 byte range, used for the + * insert and extract indices for the ringbuffer. + */ + __u32 control_handle; + + /** + * @sample_slots: The number of slots available in the userspace-provided BO. Must be + * a power of 2. + * + * If sample_slots * sample_size does not match the BO size, the setup request will fail. + */ + __u32 sample_slots; + + /** + * @control_offset: Offset into the control BO where the insert and extract indices are + * located. + */ + __u64 control_offset; + + /** + * @sample_freq_ns: Period between automatic counter sample collection in nanoseconds. Zero + * disables automatic collection and all collection must be done through explicit calls + * to DRM_PANTHOR_PERF_CONTROL.SAMPLE. Non-zero values will disable manual counter sampling + * via the DRM_PANTHOR_PERF_COMMAND_SAMPLE command. + * + * This disables software-triggered periodic sampling, but hardware will still trigger + * automatic samples on certain events, including shader core power transitions, and + * entries to and exits from non-counting periods. The final stop command will also + * trigger a sample to ensure no data is lost. + */ + __u64 sample_freq_ns; + + /** + * @fw_enable_mask: Bitmask of counters to request from the FW counter block. Any bits + * past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0 + * corresponds to counter 0. + */ + __u64 fw_enable_mask[2]; + + /** + * @cshw_enable_mask: Bitmask of counters to request from the CSHW counter block. Any bits + * past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0 + * corresponds to counter 0. + */ + __u64 cshw_enable_mask[2]; + + /** + * @tiler_enable_mask: Bitmask of counters to request from the tiler counter block. Any + * bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit + * 0 corresponds to counter 0. + */ + __u64 tiler_enable_mask[2]; + + /** + * @memsys_enable_mask: Bitmask of counters to request from the memsys counter blocks. Any + * bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0 + * corresponds to counter 0. + */ + __u64 memsys_enable_mask[2]; + + /** + * @shader_enable_mask: Bitmask of counters to request from the shader core counter blocks. + * Any bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. + * Bit 0 corresponds to counter 0. + */ + __u64 shader_enable_mask[2]; +}; + +/** + * struct drm_panthor_perf_cmd_start - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_START command is specified. + */ +struct drm_panthor_perf_cmd_start { + /** + * @user_data: User provided data that will be attached to automatic samples collected + * until the next DRM_PANTHOR_PERF_COMMAND_STOP. + */ + __u64 user_data; +}; + +/** + * struct drm_panthor_perf_cmd_stop - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_STOP command is specified. + */ +struct drm_panthor_perf_cmd_stop { + /** + * @user_data: User provided data that will be attached to the automatic sample collected + * at the end of this sampling session. + */ + __u64 user_data; +}; + +/** + * struct drm_panthor_perf_cmd_sample - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_SAMPLE command is specified. + */ +struct drm_panthor_perf_cmd_sample { + /** @user_data: User provided data that will be attached to the sample.*/ + __u64 user_data; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1237,6 +1803,8 @@ enum { DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), DRM_IOCTL_PANTHOR_BO_QUERY_INFO = DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info), + DRM_IOCTL_PANTHOR_PERF_CONTROL = + DRM_IOCTL_PANTHOR(WR, PERF_CONTROL, perf_control) }; #if defined(__cplusplus) diff --git a/src/panfrost/ds/meson.build b/src/panfrost/ds/meson.build index b61b02ee248..8f8f5ccae5d 100644 --- a/src/panfrost/ds/meson.build +++ b/src/panfrost/ds/meson.build @@ -5,6 +5,7 @@ pps_panfrost_sources = [ 'pan_pps_perf.cpp', + 'pan_pps_dev.cpp', 'pan_pps_driver.cpp' ] diff --git a/src/panfrost/ds/pan_pps_dev.cpp b/src/panfrost/ds/pan_pps_dev.cpp new file mode 100644 index 00000000000..065a9c46b7d --- /dev/null +++ b/src/panfrost/ds/pan_pps_dev.cpp @@ -0,0 +1,36 @@ +/* + * Copyright © 2021 Collabora, Ltd. + * SPDX-License-Identifier: MIT + */ + +#include "pan_pps_perf.h" + +#include +#include + +#include +#include + +namespace pps { +PanfrostDevice::PanfrostDevice(int fd): fd(fd) +{ + assert(fd >= 0); +} + +PanfrostDevice::~PanfrostDevice() +{ +} + +PanfrostDevice::PanfrostDevice(PanfrostDevice &&o): fd{o.fd} +{ + o.fd = -1; +} + +PanfrostDevice & +PanfrostDevice::operator=(PanfrostDevice &&o) +{ + std::swap(fd, o.fd); + return *this; +} + +} // namespace pps diff --git a/src/panfrost/ds/pan_pps_driver.cpp b/src/panfrost/ds/pan_pps_driver.cpp index f5982a0cba6..aa752777611 100644 --- a/src/panfrost/ds/pan_pps_driver.cpp +++ b/src/panfrost/ds/pan_pps_driver.cpp @@ -33,46 +33,13 @@ PanfrostDriver::~PanfrostDriver() uint64_t PanfrostDriver::get_min_sampling_period_ns() { - return 1000000; + return perf->get_min_sampling_period_ns(); } std::pair, std::vector> PanfrostDriver::create_available_counters(const PanfrostPerf &perf) { - std::pair, std::vector> ret; - auto &[groups, counters] = ret; - - size_t cid = 0; - - for (uint32_t gid = 0; gid < perf.perf->cfg->n_categories; ++gid) { - const auto &category = perf.perf->cfg->categories[gid]; - CounterGroup group = {}; - group.id = gid; - group.name = category.name; - - for (size_t id = 0; id < category.n_counters; ++id) { - Counter counter = {}; - counter.id = cid; - counter.group = gid; - - counter.name = category.counters[id].name; - - counter.set_getter([=](const Counter &c, const Driver &d) { - auto &pan_driver = PanfrostDriver::into(d); - struct pan_perf *perf = pan_driver.perf->perf; - const auto counter = &perf->cfg->categories[gid].counters[id]; - return int64_t(pan_perf_counter_read(counter, perf)); - }); - - group.counters.push_back(cid++); - - counters.emplace_back(counter); - } - - groups.push_back(group); - } - - return ret; + return perf.create_available_counters(); } bool @@ -81,9 +48,12 @@ PanfrostDriver::init_perfcnt() if (!dev) { dev = std::make_unique(drm_device.fd); } + if (!perf) { perf = std::make_unique(*dev); } + + perf->init_perfcnt(drm_device.fd); if (groups.empty() && counters.empty()) { std::tie(groups, counters) = create_available_counters(*perf); } @@ -106,9 +76,9 @@ PanfrostDriver::enable_all_counters() } void -PanfrostDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */) +PanfrostDriver::enable_perfcnt(const uint64_t sampling_period_ns) { - auto res = perf->enable(); + auto res = perf->enable_perfcnt(sampling_period_ns); if (!check(res, "Failed to enable performance counters")) { if (res == -ENOSYS) { PERFETTO_FATAL( @@ -121,10 +91,8 @@ PanfrostDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */) bool PanfrostDriver::dump_perfcnt() { - last_dump_ts = perfetto::base::GetBootTimeNs().count(); - // Dump performance counters to buffer - if (!check(perf->dump(), "Failed to dump performance counters")) { + if (!check(perf->dump_perfcnt(), "Failed to dump performance counters")) { PERFETTO_ELOG("Skipping sample"); return false; } @@ -135,15 +103,13 @@ PanfrostDriver::dump_perfcnt() uint64_t PanfrostDriver::next() { - auto ret = last_dump_ts; - last_dump_ts = 0; - return ret; + return perf->next(); } void PanfrostDriver::disable_perfcnt() { - perf->disable(); + perf->disable_perfcnt(); perf.reset(); dev.reset(); groups.clear(); @@ -154,20 +120,19 @@ PanfrostDriver::disable_perfcnt() uint32_t PanfrostDriver::gpu_clock_id() const { - return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; + return perf->gpu_clock_id(); } uint64_t PanfrostDriver::gpu_timestamp() const { - return perfetto::base::GetBootTimeNs().count(); + return perf->gpu_timestamp(); } bool -PanfrostDriver::cpu_gpu_timestamp(uint64_t &, uint64_t &) const +PanfrostDriver::cpu_gpu_timestamp(uint64_t &cpu_timestamp, uint64_t &gpu_timestamp) const { - /* Not supported */ - return false; + return perf->cpu_gpu_timestamp(cpu_timestamp, gpu_timestamp); } } // namespace pps diff --git a/src/panfrost/ds/pan_pps_perf.cpp b/src/panfrost/ds/pan_pps_perf.cpp index 41e89bd2e27..bbf4e64033d 100644 --- a/src/panfrost/ds/pan_pps_perf.cpp +++ b/src/panfrost/ds/pan_pps_perf.cpp @@ -1,9 +1,11 @@ /* * Copyright © 2021 Collabora, Ltd. + * Copyright © 2026 Arm, Ltd. * SPDX-License-Identifier: MIT */ #include "pan_pps_perf.h" +#include "pan_pps_driver.h" #include #include @@ -12,45 +14,24 @@ #include namespace pps { -PanfrostDevice::PanfrostDevice(int fd): fd(fd) -{ - assert(fd >= 0); -} - -PanfrostDevice::~PanfrostDevice() -{ -} - -PanfrostDevice::PanfrostDevice(PanfrostDevice &&o): fd{o.fd} -{ - o.fd = -1; -} - -PanfrostDevice & -PanfrostDevice::operator=(PanfrostDevice &&o) -{ - std::swap(fd, o.fd); - return *this; -} - PanfrostPerf::PanfrostPerf(const PanfrostDevice &dev) - : perf{reinterpret_cast( - rzalloc(nullptr, struct pan_perf))} { + perf = reinterpret_cast( + rzalloc(nullptr, struct pan_perf)); assert(perf); assert(dev.fd >= 0); - pan_perf_init(perf, dev.fd); } PanfrostPerf::~PanfrostPerf() { if (perf) { pan_perf_disable(perf); + pan_perf_finish(perf); ralloc_free(perf); } } -PanfrostPerf::PanfrostPerf(PanfrostPerf &&o): perf{o.perf} +PanfrostPerf::PanfrostPerf(PanfrostPerf &&o): perf(o.perf) { o.perf = nullptr; } @@ -62,25 +43,181 @@ PanfrostPerf::operator=(PanfrostPerf &&o) return *this; } +bool +PanfrostPerf::init_perfcnt(int fd) +{ + pan_perf_init(perf, fd); + + return perf != NULL; +} + int -PanfrostPerf::enable() const +PanfrostPerf::enable_perfcnt(uint64_t /* sampling_period_ns */) { assert(perf); return pan_perf_enable(perf); } void -PanfrostPerf::disable() const +PanfrostPerf::disable_perfcnt() { assert(perf); pan_perf_disable(perf); } -int -PanfrostPerf::dump() const +bool +PanfrostPerf::dump_perfcnt() { assert(perf); - return pan_perf_dump(perf); + + int ret = pan_perf_dump(perf); + + if (pan_perf_timestamp_supported(perf)) + last_dump_ts = pan_perf_get_timestamp(perf); + else + last_dump_ts = perfetto::base::GetBootTimeNs().count(); + + return !!(ret >= 0); +} + +uint64_t +PanfrostPerf::get_min_sampling_period_ns() +{ + assert(perf); + return pan_perf_get_min_sampling_period(perf); +} + +void * +PanfrostPerf::get_subinstance() { + return perf; +} + +std::string +format_suffix(const char *fmt, uint8_t idx) +{ + assert(strlen(fmt) < 200 && "fmt unreasonably long"); + char buf[256]; + std::snprintf(buf, sizeof(buf), fmt, idx); + + return std::string(buf); +} + +const char * +get_block_suffix(uint8_t category) +{ + assert(category <= PAN_PERF_COUNTER_CAT_MAX); + + switch (category) { + case PAN_PERF_COUNTER_CAT_MEMSYS: + return " (slice %u)"; + case PAN_PERF_COUNTER_CAT_SHADER: + return " (core %u)"; + default: + return nullptr; + } + + return nullptr; +} + +Counter::Units +convert_pan_units(enum pan_perf_counter_units unit) +{ + switch (unit) { + case PAN_PERF_COUNTER_UNITS_PRIMITIVES: + return Counter::Units::Primitive; + case PAN_PERF_COUNTER_UNITS_INSTRUCTIONS: + return Counter::Units::Instruction; + case PAN_PERF_COUNTER_UNITS_BYTES: + return Counter::Units::Byte; + case PAN_PERF_COUNTER_UNITS_PIXELS: + return Counter::Units::Pixel; + default: + return Counter::Units::None; + } +} + +std::pair, std::vector> +PanfrostPerf::create_available_counters() const +{ + std::pair, std::vector> ret; + auto &[groups, counters] = ret; + + uint32_t global_counter_id = 0; + + const struct pan_perf_category *category = NULL; + for (uint32_t cat_idx = 0; cat_idx < perf->cfg->n_categories; ++cat_idx) { + assert(cat_idx < PAN_PERF_COUNTER_CAT_MAX); + category = &perf->cfg->categories[cat_idx]; + + CounterGroup group = {}; + group.id = cat_idx; + group.name = category->name; + + uint32_t n_blocks = perf->mem_layout.category[cat_idx].n_blocks; + for (uint32_t counter_idx = 0; counter_idx < category->n_counters; + ++counter_idx) { + const struct pan_perf_counter *cinfo = + &category->counters[counter_idx]; + + for (uint32_t block_idx = 0; block_idx < n_blocks; ++block_idx) { + const char *suffix = get_block_suffix(cat_idx); + const std::string name = + cinfo->name + (suffix ? format_suffix(suffix, block_idx) : ""); + + Counter counter = {}; + counter.id = global_counter_id++; + counter.name = name; + counter.group = group.id; + counter.units = convert_pan_units(cinfo->units); + + counter.set_getter([=](const Counter &c, const Driver &d) { + auto &pan_driver = PanfrostDriver::into(d); + struct pan_perf *perf = static_cast( + pan_driver.perf->get_subinstance()); + return pan_perf_counter_read(perf, cinfo, block_idx); + }); + + group.counters.push_back(counter.id); + counters.emplace_back(counter); + } + } + + groups.push_back(group); + } + + return ret; +} + +uint64_t +PanfrostPerf::next() +{ + auto ret = last_dump_ts; + last_dump_ts = 0; + return ret; +} + +uint32_t +PanfrostPerf::gpu_clock_id() const +{ + assert(perf); + if (pan_perf_timestamp_supported(perf)) + return perfetto::protos::pbzero::BUILTIN_CLOCK_MONOTONIC_RAW; + else + return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; +} + +uint64_t +PanfrostPerf::gpu_timestamp() const +{ + // TODO (panthor) This information is present in the dump + return perfetto::base::GetBootTimeNs().count(); +} + +bool +PanfrostPerf::cpu_gpu_timestamp(uint64_t &, uint64_t &) const +{ + // TODO (panthor) Start using the appropriate IOCTL to get these values + return false; } } // namespace pps diff --git a/src/panfrost/ds/pan_pps_perf.h b/src/panfrost/ds/pan_pps_perf.h index 742f1aa8f44..b07df750ccb 100644 --- a/src/panfrost/ds/pan_pps_perf.h +++ b/src/panfrost/ds/pan_pps_perf.h @@ -5,6 +5,12 @@ #pragma once +#include +#include +#include +#include +#include + struct pan_perf; namespace pps { @@ -27,17 +33,27 @@ class PanfrostPerf { PanfrostPerf(const PanfrostDevice &dev); ~PanfrostPerf(); - PanfrostPerf(const PanfrostPerf &) = delete; - PanfrostPerf &operator=(const PanfrostPerf &) = delete; + PanfrostPerf(PanfrostPerf &&o); + PanfrostPerf &operator=(PanfrostPerf &&o); - PanfrostPerf(PanfrostPerf &&); - PanfrostPerf &operator=(PanfrostPerf &&); + std::pair, std::vector> + create_available_counters() const; - int enable() const; - void disable() const; - int dump() const; + uint64_t get_min_sampling_period_ns(); + bool init_perfcnt(int fd); + int enable_perfcnt(uint64_t sampling_period_ns); + void disable_perfcnt(); + bool dump_perfcnt(); + uint64_t next(); + uint32_t gpu_clock_id() const; + uint64_t gpu_timestamp() const; + bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, + uint64_t &gpu_timestamp) const; + void *get_subinstance(); + private: struct pan_perf *perf = nullptr; + uint64_t last_dump_ts = 0; }; } // namespace pps diff --git a/src/panfrost/lib/kmod/pan_kmod.h b/src/panfrost/lib/kmod/pan_kmod.h index e7356330e7d..922bfbbfb57 100644 --- a/src/panfrost/lib/kmod/pan_kmod.h +++ b/src/panfrost/lib/kmod/pan_kmod.h @@ -1,5 +1,6 @@ /* * Copyright © 2023 Collabora, Ltd. + * Copyright © 2026 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -37,6 +38,7 @@ #include "util/u_dynarray.h" #include "kmod/panthor_kmod.h" +#include "pan_props.h" #include "pan_trace.h" #if defined(__cplusplus) @@ -197,6 +199,9 @@ struct pan_kmod_dev_props { /* Texture feature bits. */ uint32_t texture_features[4]; + /* L2 feature bits. */ + uint32_t l2_features; + /* Maximum number of threads per core. */ uint32_t max_threads_per_core; @@ -384,6 +389,54 @@ struct pan_kmod_va_range { uint64_t size; }; +struct pan_kmod_perf_session { + /* Device this perf session was created from. */ + struct pan_kmod_dev *dev; + + /* Sample data pointer. */ + void* data; + + /* If pan_kmod_perf_session::data_ts is supported. */ + bool data_ts_supported; + + /* The timestamp of the sample data. */ + uint64_t data_ts; +}; + +enum pan_kmod_perf_category { + PAN_KMOD_PERF_CAT_FRONTEND, + PAN_KMOD_PERF_CAT_TILER, + PAN_KMOD_PERF_CAT_MEMSYS, + PAN_KMOD_PERF_CAT_SHADER, + /* Must be last. */ + PAN_KMOD_PERF_CAT_COUNT, +}; + +/* Describes the memory layout of a buffer containing performance counters. + * The buffer is structured like this: + * sample { + * header + * categories [ category { + * blocks [ block { + * header + * samples + * }] + * }] + * } + */ +struct pan_kmod_perf_buffer_layout { + struct { + /* Offset from the start of the buffer in bytes. */ + uint32_t offset; + /* Number of blocks for this category. */ + uint8_t n_blocks; + } category[PAN_KMOD_PERF_CAT_COUNT]; + + uint32_t block_stride; + uint32_t counter_stride; + uint32_t counters_per_category; +}; + /* KMD backend vtable. * * All methods described there are mandatory, unless explicitly flagged as @@ -474,6 +527,25 @@ struct pan_kmod_ops { /* Label the BO */ void (*bo_set_label)(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const char *label); + + /* Initialize a perf session. */ + struct pan_kmod_perf_session *(*perf_create)(struct pan_kmod_dev *dev); + + /* Enable perf counters. */ + int (*perf_enable)(struct pan_kmod_perf_session *session); + + /* Disable perf counters. */ + int (*perf_disable)(struct pan_kmod_perf_session *session); + + /* Dump collected perf counters. */ + int (*perf_dump)(struct pan_kmod_perf_session *session); + + /* Destroy a perf session. */ + void (*perf_destroy)(struct pan_kmod_perf_session *session); + + /* Query the memory layout for a counter buffer. */ + void (*perf_query_layout)(const struct pan_kmod_perf_session *session, + struct pan_kmod_perf_buffer_layout *layout); }; /* KMD information. */ @@ -783,6 +855,64 @@ pan_kmod_query_timestamp(const struct pan_kmod_dev *dev) return dev->ops->query_timestamp(dev); } +static inline struct pan_kmod_perf_session * +pan_kmod_perf_create(struct pan_kmod_dev *dev) +{ + return dev->ops->perf_create(dev); +} + +static inline int +pan_kmod_perf_enable(struct pan_kmod_perf_session *session) +{ + return session->dev->ops->perf_enable(session); +} + +static inline int +pan_kmod_perf_disable(struct pan_kmod_perf_session *session) +{ + return session->dev->ops->perf_disable(session); +} + +static inline int +pan_kmod_perf_dump(struct pan_kmod_perf_session *session) +{ + return session->dev->ops->perf_dump(session); +} + +static inline void +pan_kmod_perf_destroy(struct pan_kmod_perf_session *session) +{ + session->dev->ops->perf_destroy(session); +} + +static inline void +pan_kmod_perf_query_layout(const struct pan_kmod_perf_session *session, + struct pan_kmod_perf_buffer_layout *layout) +{ + session->dev->ops->perf_query_layout(session, layout); +} + +/* Load a counter value from the given address. */ +static inline int64_t +pan_kmod_perf_load_counter(const struct pan_kmod_perf_session *session, + const void *ptr) +{ + if (pan_arch(session->dev->props.gpu_id) < 10) + return *((const uint32_t*)ptr); + else { + const uint64_t val = *((const uint64_t*)ptr); +#ifndef NDEBUG + /* + * Even though the uAPI permits 64-bit unsigned counters, the counter + * values realistically never exceed INT64_MAX. + */ + return (val > INT64_MAX) ? -EINVAL : val; +#else + return val; +#endif + } +} + #if defined(__cplusplus) } // extern "C" #endif diff --git a/src/panfrost/lib/kmod/panfrost_kmod.c b/src/panfrost/lib/kmod/panfrost_kmod.c index a3d8900c251..67235ad15cf 100644 --- a/src/panfrost/lib/kmod/panfrost_kmod.c +++ b/src/panfrost/lib/kmod/panfrost_kmod.c @@ -42,6 +42,10 @@ struct panfrost_kmod_bo { uint64_t offset; }; +struct panfrost_kmod_perf_session { + struct pan_kmod_perf_session base; +}; + /* Abstraction over the raw drm_panfrost_get_param ioctl for fetching * information about devices. */ @@ -163,6 +167,8 @@ panfrost_dev_query_props(struct panfrost_kmod_dev *panfrost_dev) panfrost_query_raw(fd, DRM_PANFROST_PARAM_MEM_FEATURES, true, 0); props->mmu_features = panfrost_query_raw(fd, DRM_PANFROST_PARAM_MMU_FEATURES, true, 0); + props->l2_features = + panfrost_query_raw(fd, DRM_PANFROST_PARAM_L2_FEATURES, true, 0); for (unsigned i = 0; i < ARRAY_SIZE(props->texture_features); i++) { props->texture_features[i] = panfrost_query_raw( @@ -607,6 +613,106 @@ panfrost_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const c mesa_loge("DRM_IOCTL_PANFROST_SET_LABEL_BO failed (err=%d)", errno); } +static inline struct pan_kmod_perf_session * +panfrost_kmod_perf_init(struct pan_kmod_dev *dev) +{ + UNUSED struct panfrost_kmod_dev *panfrost_dev = + container_of(dev, struct panfrost_kmod_dev, base); + + struct panfrost_kmod_perf_session *sess = + pan_kmod_dev_alloc(dev, sizeof(*sess)); + if (!sess) { + mesa_loge("failed to allocate a panfrost_kmod_perf_session object"); + return NULL; + } + + sess->base.dev = dev; + + struct pan_kmod_perf_buffer_layout layout; + pan_kmod_perf_query_layout(&sess->base, &layout); + + uint32_t n_counters = 0; + for (uint32_t cat = 0; cat < PAN_KMOD_PERF_CAT_COUNT; ++cat) + n_counters += layout.category[cat].n_blocks * layout.counters_per_category; + + uint32_t* counter_values = pan_kmod_dev_alloc(dev, sizeof(uint32_t) * n_counters); + sess->base.data = counter_values; + sess->base.data_ts_supported = false; + + mesa_logd("perf session created"); + + return &(sess->base); +} + +static int +panfrost_kmod_perf_query(struct pan_kmod_perf_session *session, uint32_t enable) +{ + struct drm_panfrost_perfcnt_enable perfcnt_enable = {enable, 0}; + return pan_kmod_ioctl(session->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_ENABLE, + &perfcnt_enable); +} + +static int +panfrost_kmod_perf_enable(struct pan_kmod_perf_session *session) +{ + return panfrost_kmod_perf_query(session, 1 /* enable */); +} + +static int +panfrost_kmod_perf_disable(struct pan_kmod_perf_session *session) +{ + return panfrost_kmod_perf_query(session, 0 /* disable */); +} + +static int +panfrost_kmod_perf_dump(struct pan_kmod_perf_session *session) +{ + struct drm_panfrost_perfcnt_dump perfcnt_dump = { + (uint64_t)(uintptr_t)session->data}; + return pan_kmod_ioctl(session->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_DUMP, + &perfcnt_dump); +} + +static void +panfrost_kmod_perf_query_layout(const struct pan_kmod_perf_session *session, + struct pan_kmod_perf_buffer_layout *layout) +{ + /* Generally counter blocks are laid out in the following order: + * Job manager, tiler, one or more L2 caches, and one or more shader cores. + */ + unsigned l2_slices = pan_query_l2_slices(&session->dev->props); + unsigned core_id_range; + pan_query_core_count(&session->dev->props, &core_id_range); + + /* On all Bifrost architectures this is 64. */ + const unsigned counters_per_cat = 64; + layout->counters_per_category = counters_per_cat; + layout->counter_stride = sizeof(uint32_t); + layout->block_stride = counters_per_cat * sizeof(uint32_t); + + /* Setup the layout */ + layout->category[PAN_KMOD_PERF_CAT_FRONTEND].n_blocks = 1; + layout->category[PAN_KMOD_PERF_CAT_TILER].n_blocks = 1; + layout->category[PAN_KMOD_PERF_CAT_MEMSYS].n_blocks = l2_slices; + layout->category[PAN_KMOD_PERF_CAT_SHADER].n_blocks = core_id_range; + + layout->category[0].offset = 0; + for (unsigned cat_idx = 1; cat_idx < PAN_KMOD_PERF_CAT_COUNT; ++cat_idx) { + layout->category[cat_idx].offset = + layout->category[cat_idx - 1].offset + + layout->category[cat_idx - 1].n_blocks * counters_per_cat; + } +} + +static void +panfrost_kmod_perf_destroy(struct pan_kmod_perf_session *session) +{ + if (session->data) + pan_kmod_dev_free(session->dev, session->data); + pan_kmod_dev_free(session->dev, session); + mesa_logd("perf session destroyed"); +} + const struct pan_kmod_ops panfrost_kmod_ops = { .dev_create = panfrost_kmod_dev_create, .dev_destroy = panfrost_kmod_dev_destroy, @@ -624,4 +730,10 @@ const struct pan_kmod_ops panfrost_kmod_ops = { .vm_bind = panfrost_kmod_vm_bind, .query_timestamp = panfrost_kmod_query_timestamp, .bo_set_label = panfrost_kmod_bo_label, + .perf_create = panfrost_kmod_perf_init, + .perf_enable = panfrost_kmod_perf_enable, + .perf_disable = panfrost_kmod_perf_disable, + .perf_dump = panfrost_kmod_perf_dump, + .perf_query_layout = panfrost_kmod_perf_query_layout, + .perf_destroy = panfrost_kmod_perf_destroy, }; diff --git a/src/panfrost/lib/kmod/panthor_kmod.c b/src/panfrost/lib/kmod/panthor_kmod.c index 58e5d67032a..c86cd237466 100644 --- a/src/panfrost/lib/kmod/panthor_kmod.c +++ b/src/panfrost/lib/kmod/panthor_kmod.c @@ -1,5 +1,6 @@ /* * Copyright © 2023 Collabora, Ltd. + * Copyright © 2026 Arm, Ltd. * SPDX-License-Identifier: MIT */ @@ -7,6 +8,7 @@ #include #include #include +#include #include "util/hash_table.h" #include "util/libsync.h" @@ -20,6 +22,8 @@ #include "drm-uapi/dma-buf.h" #include "drm-uapi/panthor_drm.h" +#include "util/timespec.h" + #include "pan_kmod_backend.h" #include "pan_props.h" @@ -103,6 +107,43 @@ struct panthor_kmod_bo { } sync; }; +struct panthor_kmod_perf_session { + struct pan_kmod_perf_session base; + + struct { + int event; + } fds; + int session_handle; + + struct { + int ringbuf; + int control; + } bos; + + struct { + size_t sample; + size_t block; + size_t ringbuf; + size_t control; + size_t sample_header; + size_t block_header; + } sizes; + + struct { + size_t cshw_blocks; + size_t tiler_blocks; + size_t memsys_blocks; + size_t shader_blocks; + } config; + + bool session_initialized; + bool active; + uint8_t set; + uint64_t sample_idx; + uint8_t *ringbuffer; + struct drm_panthor_perf_ringbuf_control *ctrl; +}; + static uint32_t to_kmod_group_allow_priority_flags(uint32_t panthor_flags) { @@ -160,6 +201,7 @@ panthor_dev_query_props(struct panthor_kmod_dev *panthor_dev) .tiler_features = panthor_dev->props.gpu.tiler_features, .mem_features = panthor_dev->props.gpu.mem_features, .mmu_features = panthor_dev->props.gpu.mmu_features, + .l2_features = panthor_dev->props.gpu.l2_features, /* This register does not exist because AFBC is no longer optional. */ .afbc_features = 0, @@ -1302,6 +1344,472 @@ panthor_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const ch mesa_loge("DRM_IOCTL_PANTHOR_BO_SET_LABEL failed (err=%d)", errno); } +/* ================ PERF COUNTERS ================= */ + +#define PANTHOR_SAMPLE_SLOTS (32) +#define PANTHOR_POLL_TIMEOUT_SEC (10) +#define PTR_TO_U64(ptr) ((uint64_t)(uintptr_t)(ptr)) + +struct panthor_perf_sample { + struct drm_panthor_perf_sample_header sample_header; + uint8_t bytes[]; +}; + +static int +perf_cmd_setup(int fd, int eventfd, int ringbuf_handle, int control_handle, uint8_t set) +{ + struct drm_panthor_perf_cmd_setup setup = { + .fd = eventfd, + .block_set = set, + .ringbuf_handle = ringbuf_handle, + .control_handle = control_handle, + .sample_slots = PANTHOR_SAMPLE_SLOTS, + .cshw_enable_mask = { UINT64_MAX, UINT64_MAX }, + .tiler_enable_mask = { UINT64_MAX, UINT64_MAX }, + .memsys_enable_mask = { UINT64_MAX, UINT64_MAX }, + .shader_enable_mask = { UINT64_MAX, UINT64_MAX }, + }; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_SETUP, + .size = sizeof(setup), + .pointer = PTR_TO_U64(&setup), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +static int +perf_cmd_start(int fd, int sid, uint64_t user_data) +{ + struct drm_panthor_perf_cmd_start start = { + .user_data = user_data, + }; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_START, + .handle = sid, + .size = sizeof(start), + .pointer = PTR_TO_U64(&start), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +static int +perf_cmd_stop(int fd, int sid, uint64_t user_data) +{ + struct drm_panthor_perf_cmd_stop stop = {}; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_STOP, + .handle = sid, + .size = sizeof(stop), + .pointer = PTR_TO_U64(&stop), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +static int +perf_cmd_sample(int fd, int sid, uint64_t user_data) +{ + struct drm_panthor_perf_cmd_sample sample = { + .user_data = user_data, + }; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_SAMPLE, + .handle = sid, + .size = sizeof(sample), + .pointer = PTR_TO_U64(&sample), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +#define DUMMY_PTR ((uint8_t *)1) + +static int +perf_cmd_teardown(int fd, int sid) +{ + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_TEARDOWN, + .handle = sid, + }; + + int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); + + return ret; +} + +static int +unmap_and_teardown_bo(int fd, int handle, void *addr, size_t size) +{ + if (addr) + munmap(addr, size); + + struct drm_gem_close ringbuf_close = { + .handle = handle, + }; + return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &ringbuf_close); +} + +static int +create_and_map_bo(int fd, size_t size, int *handle, void **mapping) +{ + struct drm_panthor_bo_create bo = { + .size = size, + }; + int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_CREATE, &bo); + if (ret) + return -EINVAL; + + struct drm_panthor_bo_mmap_offset offset = { + .handle = bo.handle, + }; + ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET, &offset); + if (ret) + goto term_bo; + + void *map = mmap(0, bo.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off_t)offset.offset); + if (!map || map == MAP_FAILED) { + ret = -EINVAL; + goto term_bo; + } + + *handle = bo.handle; + *mapping = map; + + return 0; +term_bo: + return unmap_and_teardown_bo(fd, bo.handle, NULL, 0); +} + +static int +poll_for_sample(int poll_fd) +{ + int ret; + eventfd_t tmp; + struct pollfd pfd[1] = { + { + .fd = poll_fd, + .events = POLLIN + } + }; + struct timespec timeout = { + .tv_sec = PANTHOR_POLL_TIMEOUT_SEC, + }; + struct timespec now, result, deadline; + + clock_gettime(CLOCK_MONOTONIC, &now); + timespec_add(&deadline, &now, &timeout); + + do { + clock_gettime(CLOCK_MONOTONIC, &now); + timespec_sub_saturate(&result, &deadline, &now); + ret = ppoll(pfd, 1, &result, NULL); + } while (ret == -1 && errno == EINTR); + + if (ret < 0) + return ret; + + return eventfd_read(poll_fd, &tmp); +} + +static uint64_t +read_extract_idx(struct panthor_kmod_perf_session *perf) +{ + return p_atomic_read(&perf->ctrl->extract_idx); +} + +static void +write_extract_idx(struct panthor_kmod_perf_session *perf, uint64_t idx) +{ + p_atomic_set(&perf->ctrl->extract_idx, idx); +} + +static uint64_t +read_insert_idx(struct panthor_kmod_perf_session *perf) +{ + return p_atomic_read(&perf->ctrl->insert_idx); +} + +static inline struct pan_kmod_perf_session * +panthor_kmod_perf_init(struct pan_kmod_dev *dev) +{ + UNUSED struct panthor_kmod_dev *panthor_dev = + container_of(dev, struct panthor_kmod_dev, base); + + struct panthor_kmod_perf_session *sess = + pan_kmod_dev_alloc(dev, sizeof(*sess)); + if (!sess) { + mesa_loge("failed to allocate a panthor_kmod_perf_session object"); + return NULL; + } + + sess->base.dev = dev; + + struct drm_panthor_gpu_info gpu_info = {}; + struct drm_panthor_dev_query query = { + .type = DRM_PANTHOR_DEV_QUERY_GPU_INFO, + .size = sizeof(gpu_info), + .pointer = (uint64_t)(uintptr_t)&gpu_info, + }; + + int ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query); + if (ret) + goto free_perf; + + struct drm_panthor_perf_info perf_info = {}; + + query = (struct drm_panthor_dev_query) { + .type = DRM_PANTHOR_DEV_QUERY_PERF_INFO, + .size = sizeof(perf_info), + .pointer = (uint64_t)(uintptr_t)&perf_info, + }; + + ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query); + if (ret) + goto free_perf; + + sess->fds.event = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); + if (!sess->fds.event) + goto free_perf; + + const size_t block_size = perf_info.counters_per_block * sizeof(uint64_t) + + perf_info.block_header_size; + const size_t sample_size = perf_info.sample_size; + const size_t buffer_size = sample_size * PANTHOR_SAMPLE_SLOTS; + + sess->sizes.block = block_size; + sess->sizes.sample = sample_size; + sess->sizes.ringbuf = buffer_size; + sess->sizes.control = sizeof(*sess->ctrl); + sess->sizes.sample_header = perf_info.sample_header_size; + sess->sizes.block_header = perf_info.block_header_size; + + if (sess->sizes.sample_header != sizeof(struct drm_panthor_perf_sample_header)) + fprintf(stderr, "panfrost perf sample header size mismatch!"); + + if (sess->sizes.block_header != sizeof(struct drm_panthor_perf_block_header)) + fprintf(stderr, "panfrost perf block header size mismatch!"); + + sess->config.cshw_blocks = perf_info.cshw_blocks; + sess->config.tiler_blocks = perf_info.tiler_blocks; + sess->config.memsys_blocks = perf_info.memsys_blocks; + sess->config.shader_blocks = perf_info.shader_blocks; + + void *buf_map; + ret = create_and_map_bo(dev->fd, sess->sizes.ringbuf, &sess->bos.ringbuf, &buf_map); + if (ret) + goto free_eventfd; + + sess->ringbuffer = buf_map; + sess->base.data = buf_map; + sess->base.data_ts_supported = true; + + void *control_map; + ret = create_and_map_bo(dev->fd, sess->sizes.control, &sess->bos.control, &control_map); + if (ret) + goto free_ringbuf; + + sess->ctrl = (struct drm_panthor_perf_ringbuf_control *)control_map; + + sess->set = 0; /* TODO should we make it configurable? */ + sess->active = false; + sess->session_initialized = false; + + return &(sess->base); + +free_ringbuf: + unmap_and_teardown_bo(dev->fd, sess->bos.ringbuf, buf_map, sess->sizes.ringbuf); +free_eventfd: + close(sess->fds.event); +free_perf: + ralloc_free(sess); + return NULL; +} + +static int +panthor_kmod_perf_enable(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + /* The session cannot be created outside of the sampling thread. */ + if (!psess->session_initialized) { + int session_handle = perf_cmd_setup(psess->base.dev->fd, psess->fds.event, psess->bos.ringbuf, + psess->bos.control, psess->set); + + if (session_handle < 0) + return -EINVAL; + + psess->session_handle = session_handle; + psess->session_initialized = true; + } + + int ret = perf_cmd_start(psess->base.dev->fd, psess->session_handle, psess->sample_idx++); + if (ret) + return ret; + + psess->active = true; + + return 0; +} + +static int +panthor_kmod_perf_disable(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *sess = + container_of(session, struct panthor_kmod_perf_session, base); + + int ret = perf_cmd_stop(sess->base.dev->fd, sess->session_handle, sess->sample_idx++); + if (ret) + return ret; + + sess->active = false; + + ret = poll_for_sample(sess->fds.event); + if (ret) + return ret; + + return 0; +} + +static int +panthor_perf_sample(struct panthor_kmod_perf_session *perf) +{ + const uint64_t insert_idx = read_insert_idx(perf); + const uint64_t extract_idx = read_extract_idx(perf); + + // If there's an outstanding sample, discard it + if (insert_idx != extract_idx) + write_extract_idx(perf, insert_idx); + + // Otherwise, request a new sample which will increment the insert idx + int ret = perf_cmd_sample(perf->base.dev->fd, perf->session_handle, perf->sample_idx++); + if (ret) + return ret; + + ret = poll_for_sample(perf->fds.event); + if (ret) + return ret; + + return 0; +} + +static uint8_t *get_base_addr(uint8_t *buf, size_t idx, size_t stride) +{ + return buf + idx * stride; +} + +static inline struct panthor_perf_sample *perf_sample_idx(struct panthor_kmod_perf_session *perf, uint64_t idx) +{ + return (struct panthor_perf_sample *)get_base_addr(perf->ringbuffer, idx, perf->sizes.sample); +} + +static uint64_t +panthor_perf_get_sample_timestamp(struct panthor_kmod_perf_session *perf) +{ + const uint64_t extract_idx = read_extract_idx(perf); + const struct panthor_perf_sample *sample = perf_sample_idx(perf, extract_idx); + + return sample->sample_header.timestamp_end_ns; +} + +static int +panthor_kmod_perf_dump(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + int ret = panthor_perf_sample(psess); + if (ret) + return ret; + + /* Update data pointer to the correct spot in the ringbuffer. */ + session->data = perf_sample_idx(psess, read_extract_idx(psess)); + session->data_ts = panthor_perf_get_sample_timestamp(psess); + + return 0; +} + +static void +panthor_kmod_perf_query_layout(const struct pan_kmod_perf_session *session, + struct pan_kmod_perf_buffer_layout *layout) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + /* On all Valhall architectures this is 128. */ + const unsigned counters_per_cat = 128; + layout->counters_per_category = counters_per_cat; + + layout->block_stride = psess->sizes.block; + layout->counter_stride = sizeof(uint64_t); + + /* Setup the layout */ + layout->category[PAN_KMOD_PERF_CAT_FRONTEND].n_blocks = psess->config.cshw_blocks; + layout->category[PAN_KMOD_PERF_CAT_TILER].n_blocks = psess->config.tiler_blocks; + layout->category[PAN_KMOD_PERF_CAT_MEMSYS].n_blocks = psess->config.memsys_blocks; + layout->category[PAN_KMOD_PERF_CAT_SHADER].n_blocks = psess->config.shader_blocks; + + layout->category[0].offset = + psess->sizes.sample_header + psess->sizes.block_header; + for (unsigned cat_idx = 1; cat_idx < PAN_KMOD_PERF_CAT_COUNT; ++cat_idx) { + layout->category[cat_idx].offset = + layout->category[cat_idx - 1].offset + + layout->category[cat_idx - 1].n_blocks * layout->block_stride; + } +} + +static int +panthor_perf_stop(struct panthor_kmod_perf_session *perf) +{ + int ret = perf_cmd_stop(perf->base.dev->fd, perf->session_handle, perf->sample_idx++); + if (ret) + return ret; + + perf->active = false; + + ret = poll_for_sample(perf->fds.event); + if (ret) + return ret; + + return 0; +} + +static void +panthor_kmod_perf_destroy(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + int ret; + + if (psess->active) { + ret = panthor_perf_stop(psess); + assert(ret == 0); + } + + ret = perf_cmd_teardown(psess->base.dev->fd, psess->session_handle); + assert(ret == 0); + + ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.ringbuf, psess->ringbuffer, psess->sizes.ringbuf); + assert(ret == 0); + + ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.control, psess->ctrl, psess->sizes.control); + assert(ret == 0); + + close(psess->fds.event); + ralloc_free(psess); + + pan_kmod_dev_free(session->dev, session); + + mesa_logd("perf session destroyed"); +} + const struct pan_kmod_ops panthor_kmod_ops = { .dev_create = panthor_kmod_dev_create, .dev_destroy = panthor_kmod_dev_destroy, @@ -1319,4 +1827,10 @@ const struct pan_kmod_ops panthor_kmod_ops = { .vm_query_state = panthor_kmod_vm_query_state, .query_timestamp = panthor_kmod_query_timestamp, .bo_set_label = panthor_kmod_bo_label, + .perf_create = panthor_kmod_perf_init, + .perf_enable = panthor_kmod_perf_enable, + .perf_disable = panthor_kmod_perf_disable, + .perf_dump = panthor_kmod_perf_dump, + .perf_query_layout = panthor_kmod_perf_query_layout, + .perf_destroy = panthor_kmod_perf_destroy, }; diff --git a/src/panfrost/lib/pan_props.c b/src/panfrost/lib/pan_props.c index 056bd48d4a2..26babd143a1 100644 --- a/src/panfrost/lib/pan_props.c +++ b/src/panfrost/lib/pan_props.c @@ -17,6 +17,13 @@ pan_query_l2_slices(const struct pan_kmod_dev_props *props) return ((props->mem_features >> 8) & 0xF) + 1; } +unsigned +pan_query_bus_width(const struct pan_kmod_dev_props *props) +{ + /* BUS_WIDTH is L2_FEATURES[31:24] log2 */ + return 1 << ((props->l2_features >> 24) & 0xF); +} + struct pan_tiler_features pan_query_tiler_features(const struct pan_kmod_dev_props *props) { diff --git a/src/panfrost/lib/pan_props.h b/src/panfrost/lib/pan_props.h index 19d3b749735..cebf5616028 100644 --- a/src/panfrost/lib/pan_props.h +++ b/src/panfrost/lib/pan_props.h @@ -21,6 +21,8 @@ struct pan_kmod_vm; unsigned pan_query_l2_slices(const struct pan_kmod_dev_props *props); +unsigned pan_query_bus_width(const struct pan_kmod_dev_props *props); + struct pan_tiler_features pan_query_tiler_features(const struct pan_kmod_dev_props *props); diff --git a/src/panfrost/model/pan_model.c b/src/panfrost/model/pan_model.c index f9861ace8dc..db010f45681 100644 --- a/src/panfrost/model/pan_model.c +++ b/src/panfrost/model/pan_model.c @@ -66,34 +66,34 @@ const struct pan_model pan_model_list[] = { MIDGARD_MODEL(0x860, "T860", "T86x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)), MIDGARD_MODEL(0x880, "T880", "T88x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)), - BIFROST_MODEL(PAN_PROD_ID(6, 0, 0), "G71", "TMIx", MODEL_ANISO(NONE), MODEL_TB_SIZES( 4096, 4096)), - BIFROST_MODEL(PAN_PROD_ID(6, 2, 1), "G72", "THEx", MODEL_ANISO(R0P3), MODEL_TB_SIZES( 8192, 4096)), - BIFROST_MODEL(PAN_PROD_ID(7, 0, 0), "G51", "TSIx", MODEL_ANISO(R1P1), MODEL_TB_SIZES( 8192, 8192)), - BIFROST_MODEL(PAN_PROD_ID(7, 0, 3), "G31", "TDVx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), - BIFROST_MODEL(PAN_PROD_ID(7, 2, 1), "G76", "TNOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), - BIFROST_MODEL(PAN_PROD_ID(7, 2, 2), "G52", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), - BIFROST_MODEL(PAN_PROD_ID(7, 4, 2), "G52 r1", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), + BIFROST_MODEL(PAN_PROD_ID(6, 0, 0), "G71", "G71", MODEL_ANISO(NONE), MODEL_TB_SIZES( 4096, 4096)), + BIFROST_MODEL(PAN_PROD_ID(6, 2, 1), "G72", "G72", MODEL_ANISO(R0P3), MODEL_TB_SIZES( 8192, 4096)), + BIFROST_MODEL(PAN_PROD_ID(7, 0, 0), "G51", "G51", MODEL_ANISO(R1P1), MODEL_TB_SIZES( 8192, 8192)), + BIFROST_MODEL(PAN_PROD_ID(7, 0, 3), "G31", "G31", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), + BIFROST_MODEL(PAN_PROD_ID(7, 2, 1), "G76", "G76", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + BIFROST_MODEL(PAN_PROD_ID(7, 2, 2), "G52", "G52", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + BIFROST_MODEL(PAN_PROD_ID(7, 4, 2), "G52 r1", "G52", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), - VALHALL_MODEL(PAN_PROD_ID(9, 0, 1), 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(9, 0, 1), 0, "G57", "G77", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 4, 32)), - VALHALL_MODEL(PAN_PROD_ID(9, 0, 3), 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(9, 0, 3), 0, "G57", "G77", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 4, 32)), - VALHALL_MODEL(PAN_PROD_ID(10, 8, 7), 0, "G610", "TVIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), + VALHALL_MODEL(PAN_PROD_ID(10, 8, 7), 0, "G610", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), MODEL_RATES(4, 8, 64)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 0, "G310v1", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 0, "G310v1", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 2, 16)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 1, "G310v2", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 1, "G310v2", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 4, 32)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 2, "G310v3", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 2, "G310v3", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(4, 4, 48)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 3, "G310v4", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 3, "G310v4", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), MODEL_RATES(4, 8, 48)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 4, "G310v5", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 4, "G310v5", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), MODEL_RATES(4, 8, 64)), - FIFTHGEN_MODEL(PAN_PROD_ID(12, 8, 0), 4, "G720", "TTIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 32768), + FIFTHGEN_MODEL(PAN_PROD_ID(12, 8, 0), 4, "G720", "G720", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 32768), MODEL_RATES(4, 8, 128)), - FIFTHGEN_MODEL(PAN_PROD_ID(13, 8, 0), 4, "G725", "TKRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536), + FIFTHGEN_MODEL(PAN_PROD_ID(13, 8, 0), 4, "G725", "G725", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536), MODEL_RATES(4, 8, 128)), }; /* clang-format on */ diff --git a/src/panfrost/perf/G31.xml b/src/panfrost/perf/G31.xml deleted file mode 100644 index 0cb8d3fabd8..00000000000 --- a/src/panfrost/perf/G31.xml +++ /dev/null @@ -1,158 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G51.xml b/src/panfrost/perf/G51.xml deleted file mode 100644 index 2ee1958145b..00000000000 --- a/src/panfrost/perf/G51.xml +++ /dev/null @@ -1,158 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G52.xml b/src/panfrost/perf/G52.xml deleted file mode 100644 index e42dfdb2d8d..00000000000 --- a/src/panfrost/perf/G52.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G57.xml b/src/panfrost/perf/G57.xml deleted file mode 100644 index 2c5e843a6f2..00000000000 --- a/src/panfrost/perf/G57.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G68.xml b/src/panfrost/perf/G68.xml deleted file mode 100644 index ef14f91462e..00000000000 --- a/src/panfrost/perf/G68.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G71.xml b/src/panfrost/perf/G71.xml deleted file mode 100644 index 6080c5eeb59..00000000000 --- a/src/panfrost/perf/G71.xml +++ /dev/null @@ -1,158 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G72.xml b/src/panfrost/perf/G72.xml deleted file mode 100644 index 89c3a118dce..00000000000 --- a/src/panfrost/perf/G72.xml +++ /dev/null @@ -1,158 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G76.xml b/src/panfrost/perf/G76.xml deleted file mode 100644 index 5777a64c7ea..00000000000 --- a/src/panfrost/perf/G76.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G77.xml b/src/panfrost/perf/G77.xml deleted file mode 100644 index b107ae96ba0..00000000000 --- a/src/panfrost/perf/G77.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G78.xml b/src/panfrost/perf/G78.xml deleted file mode 100644 index b0093ef0cc1..00000000000 --- a/src/panfrost/perf/G78.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/generated/G1.xml b/src/panfrost/perf/generated/G1.xml new file mode 100644 index 00000000000..3a59e3d8998 --- /dev/null +++ b/src/panfrost/perf/generated/G1.xml @@ -0,0 +1,345 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G31.xml b/src/panfrost/perf/generated/G31.xml new file mode 100644 index 00000000000..326c06bc175 --- /dev/null +++ b/src/panfrost/perf/generated/G31.xml @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G51.xml b/src/panfrost/perf/generated/G51.xml new file mode 100644 index 00000000000..661ce6a6b1a --- /dev/null +++ b/src/panfrost/perf/generated/G51.xml @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G52.xml b/src/panfrost/perf/generated/G52.xml new file mode 100644 index 00000000000..a2ab269e4d6 --- /dev/null +++ b/src/panfrost/perf/generated/G52.xml @@ -0,0 +1,251 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G71.xml b/src/panfrost/perf/generated/G71.xml new file mode 100644 index 00000000000..36817506f30 --- /dev/null +++ b/src/panfrost/perf/generated/G71.xml @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G710.xml b/src/panfrost/perf/generated/G710.xml new file mode 100644 index 00000000000..a67dad5b183 --- /dev/null +++ b/src/panfrost/perf/generated/G710.xml @@ -0,0 +1,272 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G715.xml b/src/panfrost/perf/generated/G715.xml new file mode 100644 index 00000000000..e8fa0bb074f --- /dev/null +++ b/src/panfrost/perf/generated/G715.xml @@ -0,0 +1,294 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G72.xml b/src/panfrost/perf/generated/G72.xml new file mode 100644 index 00000000000..0853083d49d --- /dev/null +++ b/src/panfrost/perf/generated/G72.xml @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G720.xml b/src/panfrost/perf/generated/G720.xml new file mode 100644 index 00000000000..93255e21e93 --- /dev/null +++ b/src/panfrost/perf/generated/G720.xml @@ -0,0 +1,303 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G725.xml b/src/panfrost/perf/generated/G725.xml new file mode 100644 index 00000000000..650061ac1c1 --- /dev/null +++ b/src/panfrost/perf/generated/G725.xml @@ -0,0 +1,335 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G76.xml b/src/panfrost/perf/generated/G76.xml new file mode 100644 index 00000000000..6387a90613c --- /dev/null +++ b/src/panfrost/perf/generated/G76.xml @@ -0,0 +1,251 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G77.xml b/src/panfrost/perf/generated/G77.xml new file mode 100644 index 00000000000..e0115f7b4e1 --- /dev/null +++ b/src/panfrost/perf/generated/G77.xml @@ -0,0 +1,259 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G78.xml b/src/panfrost/perf/generated/G78.xml new file mode 100644 index 00000000000..87e05167548 --- /dev/null +++ b/src/panfrost/perf/generated/G78.xml @@ -0,0 +1,261 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/meson.build b/src/panfrost/perf/meson.build index 2f8257f1875..f16927dadf5 100644 --- a/src/panfrost/perf/meson.build +++ b/src/panfrost/perf/meson.build @@ -3,8 +3,20 @@ # SPDX-License-Identifier: MIT pan_hw_metrics = [ - 'G31', 'G51', 'G52', 'G57', 'G68', 'G71', 'G72', 'G76', 'G77', - 'G78', 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x', + 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x', + 'generated/G31', + 'generated/G51', + 'generated/G52', + 'generated/G71', + 'generated/G72', + 'generated/G76', + 'generated/G77', + 'generated/G78', + 'generated/G710', + 'generated/G715', + 'generated/G720', + 'generated/G725', + 'generated/G1', ] pan_hw_metrics_xml_files = [] diff --git a/src/panfrost/perf/pan_gen_perf.py b/src/panfrost/perf/pan_gen_perf.py index f21a2589284..4f240e8c408 100644 --- a/src/panfrost/perf/pan_gen_perf.py +++ b/src/panfrost/perf/pan_gen_perf.py @@ -4,9 +4,15 @@ import argparse import textwrap import os +import datetime +import re +from dataclasses import dataclass +from typing import ClassVar import xml.etree.ElementTree as et +TAB_SIZE = 3 + class SourceFile: def __init__(self, filename): @@ -14,7 +20,7 @@ class SourceFile: self._indent = 0 def write(self, *args): - code = ' '.join(map(str,args)) + code = ' '.join(map(str, args)) for line in code.splitlines(): text = ''.rjust(self._indent) + line self.file.write(text.rstrip() + "\n") @@ -26,6 +32,17 @@ class SourceFile: self._indent -= n +CATEGORY_IDX_REMAP = { + "GPU Front-end": "PAN_PERF_COUNTER_CAT_FRONTEND", + "Job Manager": "PAN_PERF_COUNTER_CAT_FRONTEND", + "CSF": "PAN_PERF_COUNTER_CAT_FRONTEND", + "Tiler": "PAN_PERF_COUNTER_CAT_TILER", + "Memory System" : "PAN_PERF_COUNTER_CAT_MEMSYS", + "L2 Cache": "PAN_PERF_COUNTER_CAT_MEMSYS", + "Shader Core": "PAN_PERF_COUNTER_CAT_SHADER", +} + + class Counter: # category Category owning the counter # xml XML representation of itself @@ -35,12 +52,18 @@ class Counter: self.name = self.xml.get("name") self.desc = self.xml.get("description") self.units = self.xml.get("units") - self.offset = int(self.xml.get("offset")) - self.underscore_name = self.xml.get("counter").lower() + self.equation = self.xml.get("equation") + self.offset = int(self.xml.get("offset") or 0) + self.underscore_name = (self.xml.get("counter") or "").lower() + self.source_name = self.xml.get("counter") or "" + self.equation_impl = None + + if self.units.endswith("/second"): + self.units = self.units.replace("/second", "_per_second") class Category: - # product Product owning the gategory + # product Product owning the category # xml XML representation of itself def __init__(self, product, xml): self.product = product @@ -60,6 +83,7 @@ class Product: self.filename = filename self.xml = et.parse(self.filename) self.name = self.xml.getroot().get('id') + assert(self.name is not None) self.id = self.name.lower() self.categories = [] @@ -67,6 +91,114 @@ class Product: self.categories.append(Category(self, xml_cat)) +@dataclass +class EquationImpl: + fname: str + body: str + counter: Counter + version: int = -1 + + impls: ClassVar[dict[str, dict[str, 'EquationImpl']]] = {} + + """We don't want duplicate methods wasting space, this makes sure there is + only one implementation for each variant of counter hardware locations. + """ + @classmethod + def get(cls, counter, all_counters): + + body = cls.generate_body(counter, all_counters) + + if counter.name not in cls.impls: + cls.impls[counter.name] = {} + + bucket = cls.impls[counter.name] + + if body not in bucket: + fname = f"compute_{counter.name.lower()}" + eq = EquationImpl(fname, body, counter) + eq.version = len(bucket.keys()) + bucket[body] = eq + + return bucket[body] + + @staticmethod + def generate_body(counter, counters): + eq = counter.equation + + vals = dict() + + for c in sorted(counters, key=lambda x: len(x.source_name), reverse=True): + if c.source_name == "" or c.source_name not in eq: + continue + + idx = len(vals) + cat_enum = CATEGORY_IDX_REMAP[c.category.name] + # MaliAnyUtil for example is from "Shader Core" but it reads GPU_ACTIVE + # which is from "Front-end". We can not use the block index from the + # shader core when reading a front-end counter. + # If reading from another block for the equation the only block index that + # makes sense is 0 because if the category had more than one block we + # could not know which one to choose. + from_block = 'block' if c.category.name == counter.category.name else '0' + r = f"const double v{idx} = pan_perf_counter_read_raw(perf, {cat_enum}, {c.offset}, {from_block});" + vals[c.source_name] = (idx, r) + + eq = eq.replace(c.source_name, f"v{idx}") + + for match in re.finditer(r"(MALI_CONFIG[a-zA-Z0-9_]+)($|[^a-zA-Z0-9_])", eq): + config = match.group(1) + + idx = len(vals) + pan_config = config.replace("MALI", "PAN_PERF_DERIVED") + r = f"const double v{idx} = configs[{pan_config}];" + vals[config] = (idx, r) + + eq = eq.replace(config, f"v{idx}") + + defs = [r for _, r in vals.values()] + body = "\n".join(defs) + "\n" + body += f"return {eq};" + + return body + + @property + def versioned_name(self): + assert (self.version != -1 and "should not emit non versioned") + return self.fname + f"_v{self.version}" + + @property + def decl(self): + decl = "double " + self.versioned_name + \ + "(const struct pan_perf *perf, const double *configs, uint8_t block)" + return decl + + +def generate_equations(prods, c): + for prod in prods: + + all_raw_counters = [] + for cat in prod.categories: + for counter in cat.counters: + if counter.source_name: + all_raw_counters.append(counter) + + for cat in prod.categories: + for counter in cat.counters: + if not counter.equation: + continue + + eq = EquationImpl.get(counter, all_raw_counters) + counter.equation_impl = eq + + for impls in EquationImpl.impls.values(): + for impl in impls.values(): + c.write("static " + impl.decl + "{") + c.indent(TAB_SIZE) + c.write(impl.body) + c.outdent(TAB_SIZE) + c.write("}\n") + + def main(): parser = argparse.ArgumentParser() parser.add_argument("--header", help="Header file to write", required=True) @@ -82,17 +214,17 @@ def main(): for xml_file in args.xml_files: prods.append(Product(xml_file)) - tab_size = 3 + tab_size = TAB_SIZE copyright = textwrap.dedent("""\ /* Autogenerated file, DO NOT EDIT manually! generated by {} * - * Copyright © 2021 Arm Limited - * Copyright © 2021 Collabora Ltd. + * Copyright © {year} Arm Limited + * Copyright © {year} Collabora Ltd. * SPDX-License-Identifier: MIT */ - """).format(os.path.basename(__file__)) + """).format(os.path.basename(__file__), year=datetime.datetime.now().year) h.write(copyright) h.write(textwrap.dedent("""\ @@ -110,6 +242,79 @@ def main(): #include """)) + c.write(textwrap.dedent(""" + static inline int max2(int a, int b) { + return MAX2(a, b); + } + + static inline int max3(int a, int b, int c) { + return max2(max2(a, b), c); + } + + static inline int max4(int a, int b, int c, int d) { + return max2(max3(a, b, c), d); + } + + static inline int max5(int a, int b, int c, int d, int e) { + return max2(max4(a, b, c, d), e); + } + + static inline int max6(int a, int b, int c, int d, int e, int f) { + return max2(max5(a, b, c, d, e), f); + } + + static inline int max7(int a, int b, int c, int d, int e, int f, int g) { + return max2(max6(a, b, c, d, e, f), g); + } + + static inline int max8(int a, int b, int c, int d, int e, int f, int g, int h) { + return max2(max7(a, b, c, d, e, f, g), h); + } + + static inline int max9(int a, int b, int c, int d, int e, int f, int g, int h, int i) { + return max2(max8(a, b, c, d, e, f, g, h), i); + } + + static inline int min2(int a, int b) { + return MIN2(a, b); + } + + static inline int min3(int a, int b, int c) { + return min2(min2(a, b), c); + } + + static inline int min4(int a, int b, int c, int d) { + return min2(min3(a, b, c), d); + } + + static inline int min5(int a, int b, int c, int d, int e) { + return min2(min4(a, b, c, d), e); + } + + static inline int min6(int a, int b, int c, int d, int e, int f) { + return min2(min5(a, b, c, d, e), f); + } + + static inline int min7(int a, int b, int c, int d, int e, int f, int g) { + return min2(min6(a, b, c, d, e, f), g); + } + + static inline int min8(int a, int b, int c, int d, int e, int f, int g, int h) { + return min2(min7(a, b, c, d, e, f, g), h); + } + + static inline int min9(int a, int b, int c, int d, int e, int f, int g, int h, int i) { + return min2(min8(a, b, c, d, e, f, g, h), i); + } + + #define GET_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,name,...) name + #define min(...) GET_MACRO(__VA_ARGS__, min9, min8, min7, min6, min5, min4, min3, min2)(__VA_ARGS__) + #define max(...) GET_MACRO(__VA_ARGS__, max9, max8, max7, max6, max5, max4, max3, max2)(__VA_ARGS__) + + """)) + + generate_equations(prods, c) + for prod in prods: c.write(textwrap.dedent(""" static void UNUSED @@ -119,7 +324,7 @@ def main(): c.indent(tab_size) n_categories = len(prod.categories) - c.write("STATIC_ASSERT(%u <= PAN_PERF_MAX_CATEGORIES);" % n_categories) + c.write("STATIC_ASSERT(%u <= PAN_PERF_COUNTER_CAT_MAX);" % n_categories) n_counters = 0 for category in prod.categories: category_counters_count = len(category.counters) @@ -145,7 +350,7 @@ def main(): for i in range(0, len(prod.categories)): category = prod.categories[i] - c.write("{") + c.write("[%s] = {" % CATEGORY_IDX_REMAP[category.name]) c.indent(tab_size) c.write(".name = \"%s\"," % (category.name)) c.write(".n_counters = %u," % (len(category.counters))) @@ -164,7 +369,11 @@ def main(): c.write(".symbol_name = \"%s\"," % (counter.underscore_name)) c.write(".units = PAN_PERF_COUNTER_UNITS_%s," % (counter.units.upper())) c.write(".offset = %u," % (counter.offset)) - c.write(".category_index = %u," % i) + c.write(".category = %s," % CATEGORY_IDX_REMAP[category.name]) + if counter.equation: + c.write(f".derived = {counter.equation_impl.versioned_name},") + else: + c.write(".derived = NULL,") c.outdent(tab_size) c.write("}, // counter") @@ -188,7 +397,7 @@ def main(): c.write("\nconst struct pan_perf_config * pan_perf_configs[] = {") c.indent(tab_size) for prod in prods: - c.write("&pan_perf_config_%s," % prod.id) + c.write("&pan_perf_config_%s," % prod.id) c.outdent(tab_size) c.write("};") diff --git a/src/panfrost/perf/pan_gen_perf_defs.py b/src/panfrost/perf/pan_gen_perf_defs.py new file mode 100644 index 00000000000..fadb46e1c66 --- /dev/null +++ b/src/panfrost/perf/pan_gen_perf_defs.py @@ -0,0 +1,286 @@ +# Copyright (c) 2026 Arm Ltd. +# SPDX-License-Identifier: MIT + +from argparse import ArgumentParser +from pathlib import Path +from dataclasses import dataclass +import datetime +import subprocess +import xml.etree.ElementTree as et +import re + +COUNTERINFO_PATH = "./specification/database/counterinfo" +HARDWARE_LAYOUT_PATH = "./specification/database/hardwarelayout" + +HW_LAYOUT_LUT: dict[str, "HardwareLayout"] = {} + +OUTPUT_COPYRIGHT = """ + +""" + + +def get_revision(path): + cmd = ["git", "rev-parse", "HEAD"] + res = subprocess.run(cmd, capture_output=True, cwd=path.as_posix()) + if res.returncode != 0: + return None + else: + return res.stdout.decode().strip() + + +def map_nn(v, f): + return None if v is None else f(v) + + +def get_elem_text(xml, name): + e = xml.find(name) + if e is not None: + return e.text + else: + return None + + +@dataclass(frozen=True) +class CounterHwLocation: + block: str + counter_index: int + + +@dataclass +class HardwareLayout: + gpu_name: str + # map source name to (block index, counter index) + locations: dict[str, CounterHwLocation] + + @staticmethod + def from_xml(xml: et.Element) -> "HardwareLayout": + gpu_name = xml.get("gpu") + assert gpu_name is not None + locations = {} + for cbe in xml.findall("CounterBlock"): + cb_name = cbe.get("type") + assert cb_name is not None + for counter in cbe.findall("Counter"): + source_name = counter.get("name") + counter_index = counter.get("index") + assert counter_index is not None + locations[source_name] = CounterHwLocation( + cb_name, int(counter_index)) + + return HardwareLayout(gpu_name=gpu_name, locations=locations) + + +def parse_hw_layout(path: Path): + xml = et.parse(path) + return HardwareLayout.from_xml(xml.getroot()) + + +def parse_supported_gpus(xml): + supported_list = xml.find("SupportedGPUs") + return [e.text for e in supported_list.findall("GPU")] + + +def group_from_filename(fname): + # This maps to the values of the "type" field in the CounterBlock xml blocks. + fname_to_dbkey = { + "GPUFrontEnd": "GPU Front-end", + "L2Cache": "Memory System", + "Tiler": "Tiler", + "ShaderCore": "Shader Core", + "Constants": "Constants", + "Content": "Content", + } + for name, key in fname_to_dbkey.items(): + if name in fname: + return key + assert False and "could not find group from filename" + + +@dataclass +class CounterInfo: + machine_name: str + supported_gpus: list[str] + group: str + equation: str = "" + source_name: str = "" + # Can be used as a fallback to find hw offsets if source_name isn't available. + source_alias_name: str = "" + human_name: str = "" + short_desc: str = "" + units: str = "" + + @staticmethod + def from_xml(xml, group): + machine_name = get_elem_text(xml, "MachineName") + assert machine_name is not None + supported = parse_supported_gpus(xml) + + desc_raw = get_elem_text(xml, "ShortDescription") or "" + desc_san = " ".join(map(str.strip, desc_raw.splitlines())).strip() + + return CounterInfo( + machine_name, + supported, + group, + equation=map_nn(get_elem_text(xml, "Equation"), str.strip) or "", + source_name=get_elem_text(xml, "SourceName") or "", + source_alias_name=get_elem_text(xml, "SourceAlias") or "", + human_name=get_elem_text(xml, "HumanName") or "", + short_desc=desc_san, + units=(get_elem_text(xml, "Units") or "").strip(), + ) + + def is_derived(self): + return not self.source_name + + def get_hw_offsets(self, gpu: str) -> CounterHwLocation: + assert self.source_name != "" + assert gpu in self.supported_gpus + locs = HW_LAYOUT_LUT[gpu].locations + if self.source_name in locs: + return locs[self.source_name] + else: + # If the normal source name doesn't work try the alias + # Needed for example for RT_RAY_BOX_ISSUED on G1 which is using the + # alias RT_BOX_ISSUE_CYCLES there. + assert self.source_alias_name != "" + return locs[self.source_alias_name] + + def is_supported(self): + return "MALI_CONFIG_TIME_SPAN" not in self.equation + + +@dataclass +class ProductInfo: + product_id: str + database_key: str + + +def parse_counters(path: Path): + group = group_from_filename(path.name) + xml = et.parse(path) + return [CounterInfo.from_xml(e, group) for e in xml.findall("CounterInfo")] + + +def resolve_equation(eq: str, counters_gpu: list[CounterInfo]): + sorted_c = sorted(counters_gpu, key=lambda c: len(c.machine_name)) + max_len = max([len(c.machine_name) for c in sorted_c]) + + # This loop replaces variables which aren't hardware counters or config values + # until only all have been replaced. + # Iterate backwards from the largest to the smallest variable to make this work: + # eq = MaliMainQueueTask * MaliMainQueueTaskSize * MaliMainQueueTaskSize + + progress = True + while progress: + progress = False + for l in range(max_len, 0, -1): + for c in filter(lambda c: len(c.machine_name) == l, sorted_c): + if c.machine_name in eq: + if c.is_derived(): + repl = f"({c.equation})" + else: + assert c.source_name is not None + repl = f"({c.source_name})" + + eq = eq.replace(c.machine_name, repl) + progress = True + break + + # There was a change, need to restart because we might have added + # a variable with len(name) > l. + if progress: + break + return eq + + +def counter_list_to_xml(counters: list[CounterInfo], gpu: str): + gpu_xml = gpu.replace("Mali-", "").replace("Mali", "").strip() + root = et.Element("metrics", attrib={"id": gpu_xml}) + + IGNORE_CATS = {"Constants", "Content"} + + cat_names = set([c.group for c in counters]) + categories = dict() + for c in sorted(cat_names): + if c in IGNORE_CATS: + continue + categories[c] = et.SubElement(root, "category", attrib={"name": c}) + + for counter in sorted(counters, key=lambda c: c.machine_name): + if not counter.is_supported(): + continue + + if counter.group in IGNORE_CATS: + continue + p = categories[counter.group] + + attrib = { + "name": counter.machine_name, + "title": counter.human_name, + "description": counter.short_desc, + "units": counter.units, + } + + if counter.is_derived(): + attrib["equation"] = resolve_equation(counter.equation, counters) + else: + attrib["counter"] = counter.source_name + attrib["offset"] = str(counter.get_hw_offsets(gpu).counter_index) + + et.SubElement(p, "event", attrib) + + return root + + +def main(): + p = ArgumentParser() + p.add_argument("lib_gpu_counters", type=Path, + help="Path to libGPUCounter source") + p.add_argument( + "--output-path", type=Path, default=Path(__file__).parent / "generated" + ) + args = p.parse_args() + + for f in (args.lib_gpu_counters / HARDWARE_LAYOUT_PATH).glob("*.xml"): + l = parse_hw_layout(f) + HW_LAYOUT_LUT[l.gpu_name] = l + + counters: list[CounterInfo] = [] + for f in (args.lib_gpu_counters / COUNTERINFO_PATH).glob("*.xml"): + counters += parse_counters(f) + + args.output_path.mkdir(exist_ok=True) + + # Generate one file for each GPU. + all_gpus = set().union(*(c.supported_gpus for c in counters)) + for gpu in all_gpus: + gpu_counters = [c for c in counters if gpu in c.supported_gpus] + xml = counter_list_to_xml(gpu_counters, gpu) + et.indent(xml) + + fname = gpu.replace("Mali-", "").replace("Mali", "").strip() + ".xml" + year = datetime.datetime.now().year + rev = get_revision(args.lib_gpu_counters) + assert(rev is not None) + + with open(args.output_path / fname, "wb") as f: + f.write( + OUTPUT_COPYRIGHT.format( + year=year, rev=rev).encode(encoding="utf-8") + ) + f.write(et.tostring(xml, encoding="utf-8")) + f.write("\n".encode(encoding="utf-8")) + + +if __name__ == "__main__": + main() diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 604e0f75a26..66cd514bed4 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -1,5 +1,6 @@ /* * Copyright © 2021 Collabora, Ltd. + * Copyright © 2026 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -8,7 +9,6 @@ #include #include "util/macros.h" -#include "util/ralloc.h" #include "pan_perf.h" @@ -17,23 +17,29 @@ #include #include -#define PAN_COUNTERS_PER_CATEGORY 64 -#define PAN_SHADER_CORE_INDEX 3 - -uint32_t -pan_perf_counter_read(const struct pan_perf_counter *counter, - const struct pan_perf *perf) +int64_t +pan_perf_counter_read(const struct pan_perf *perf, + const struct pan_perf_counter *counter, uint8_t block) { - unsigned offset = perf->category_offset[counter->category_index]; - offset += counter->offset; - assert(offset < perf->n_counter_values); + if (counter->derived != NULL) + return counter->derived(perf, perf->derived_configs, block); + else + return pan_perf_counter_read_raw(perf, counter->category, counter->offset, block); +} - uint32_t ret = perf->counter_values[offset]; +int64_t +pan_perf_counter_read_block_sum(const struct pan_perf_counter *counter, + const struct pan_perf *perf) +{ + int64_t ret = pan_perf_counter_read(perf, counter, 0); - // If counter belongs to shader core, accumulate values for all other cores - if (counter->category_index == PAN_SHADER_CORE_INDEX) { - for (uint32_t core = 1; core < perf->core_id_range; ++core) { - ret += perf->counter_values[offset + PAN_COUNTERS_PER_CATEGORY * core]; + /* If counter belongs to shader core, sum values for all cores. */ + if (counter->category == PAN_PERF_COUNTER_CAT_SHADER) { + uint32_t n_cores = + perf->mem_layout.category[PAN_PERF_COUNTER_CAT_SHADER].n_blocks; + for (uint32_t core = 1; core < n_cores; ++core) { + ret += pan_perf_counter_read(perf, counter, core); + assert(ret >= 0 && "counter sum should not overflow"); } } @@ -54,16 +60,12 @@ pan_lookup_counters(const char *name) void pan_perf_init(struct pan_perf *perf, int fd) { - ASSERTED drmVersionPtr version = drmGetVersion(fd); - - /* We only support panfrost at the moment. */ - assert(version && !strcmp(version->name, "panfrost")); - - drmFreeVersion(version); - perf->dev = pan_kmod_dev_create(fd, 0, NULL); assert(perf->dev); + perf->session = pan_kmod_perf_create(perf->dev); + assert(perf->session); + struct pan_kmod_dev_props props = perf->dev->props; const struct pan_model *model = @@ -76,49 +78,59 @@ pan_perf_init(struct pan_perf *perf, int fd) if (perf->cfg == NULL) UNREACHABLE("Performance counters missing!"); - // Generally counter blocks are laid out in the following order: - // Job manager, tiler, one or more L2 caches, and one or more shader cores. - unsigned l2_slices = pan_query_l2_slices(&props); - pan_query_core_count(&props, &perf->core_id_range); + pan_kmod_perf_query_layout(perf->session, &perf->mem_layout); - uint32_t n_blocks = 2 + l2_slices + perf->core_id_range; - perf->n_counter_values = PAN_COUNTERS_PER_CATEGORY * n_blocks; - perf->counter_values = ralloc_array(perf, uint32_t, perf->n_counter_values); + unsigned unused; - /* Setup the layout */ - perf->category_offset[0] = PAN_COUNTERS_PER_CATEGORY * 0; - perf->category_offset[1] = PAN_COUNTERS_PER_CATEGORY * 1; - perf->category_offset[2] = PAN_COUNTERS_PER_CATEGORY * 2; - perf->category_offset[3] = PAN_COUNTERS_PER_CATEGORY * (2 + l2_slices); -} - -static int -pan_perf_query(struct pan_perf *perf, uint32_t enable) -{ - struct drm_panfrost_perfcnt_enable perfcnt_enable = {enable, 0}; - return pan_kmod_ioctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_ENABLE, - &perfcnt_enable); + perf->derived_configs[PAN_PERF_DERIVED_CONFIG_SHADER_CORE_COUNT] = + pan_query_core_count(&props, &unused); + perf->derived_configs[PAN_PERF_DERIVED_CONFIG_L2_CACHE_COUNT] = + pan_query_l2_slices(&props); + perf->derived_configs[PAN_PERF_DERIVED_CONFIG_EXT_BUS_BYTE_SIZE] = + pan_query_bus_width(&props); } int pan_perf_enable(struct pan_perf *perf) { - return pan_perf_query(perf, 1 /* enable */); + return pan_kmod_perf_enable(perf->session); } int pan_perf_disable(struct pan_perf *perf) { - return pan_perf_query(perf, 0 /* disable */); + return pan_kmod_perf_disable(perf->session); +} + +void +pan_perf_finish(struct pan_perf *perf) +{ + pan_kmod_perf_destroy(perf->session); } int pan_perf_dump(struct pan_perf *perf) { - // Dump performance counter values to the memory buffer pointed to by - // counter_values - struct drm_panfrost_perfcnt_dump perfcnt_dump = { - (uint64_t)(uintptr_t)perf->counter_values}; - return pan_kmod_ioctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_DUMP, - &perfcnt_dump); + return pan_kmod_perf_dump(perf->session); +} + +uint64_t +pan_perf_get_timestamp(const struct pan_perf *perf) +{ + return perf->session->data_ts; +} + +bool +pan_perf_timestamp_supported(const struct pan_perf *perf) +{ + return perf->session->data_ts_supported; +} + +uint64_t +pan_perf_get_min_sampling_period(const struct pan_perf *perf) +{ + if (pan_arch(perf->dev->props.gpu_id) < 10) + return 1000000; + else + return 500000; } diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index f60012b7d28..3c8640789cc 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -8,18 +8,23 @@ #include +#include + #if defined(__cplusplus) extern "C" { #endif -#define PAN_PERF_MAX_CATEGORIES 4 -#define PAN_PERF_MAX_COUNTERS 64 +/* 128 hardware counters, but there can be more derived ones. */ +#define PAN_PERF_MAX_COUNTERS 190 -struct pan_kmod_dev; -struct pan_kmod_dev_props; -struct pan_model; -struct pan_perf_category; -struct pan_perf; +enum pan_perf_counter_categories { + PAN_PERF_COUNTER_CAT_FRONTEND, + PAN_PERF_COUNTER_CAT_TILER, + PAN_PERF_COUNTER_CAT_MEMSYS, + PAN_PERF_COUNTER_CAT_SHADER, + /* Must be last. */ + PAN_PERF_COUNTER_CAT_MAX, +}; enum pan_perf_counter_units { PAN_PERF_COUNTER_UNITS_CYCLES, @@ -37,8 +42,24 @@ enum pan_perf_counter_units { PAN_PERF_COUNTER_UNITS_BYTES, PAN_PERF_COUNTER_UNITS_PIXELS, PAN_PERF_COUNTER_UNITS_ISSUES, + PAN_PERF_COUNTER_UNITS_INTERRUPTS, + PAN_PERF_COUNTER_UNITS_PERCENT, + PAN_PERF_COUNTER_UNITS_TESTS, + PAN_PERF_COUNTER_UNITS_RAYS, + PAN_PERF_COUNTER_UNITS_NODES, + PAN_PERF_COUNTER_UNITS_BOXES, + PAN_PERF_COUNTER_UNITS_BYTES_PER_SECOND, }; +enum pan_perf_derived_config { + PAN_PERF_DERIVED_CONFIG_SHADER_CORE_COUNT, + PAN_PERF_DERIVED_CONFIG_EXT_BUS_BYTE_SIZE, + PAN_PERF_DERIVED_CONFIG_L2_CACHE_COUNT, + PAN_PERF_DERIVED_CONFIG_LAST = PAN_PERF_DERIVED_CONFIG_L2_CACHE_COUNT, +}; + +struct pan_perf; + struct pan_perf_counter { const char *name; const char *desc; @@ -46,7 +67,10 @@ struct pan_perf_counter { enum pan_perf_counter_units units; // Offset of this counter's value within the category uint32_t offset; - unsigned category_index; + enum pan_perf_counter_categories category; + + /* Optional, function to compute the derived counters value. */ + double (*derived)(const struct pan_perf*, const double*, uint8_t); }; struct pan_perf_category { @@ -62,25 +86,45 @@ struct pan_perf_category { struct pan_perf_config { const char *name; - struct pan_perf_category categories[PAN_PERF_MAX_CATEGORIES]; + struct pan_perf_category categories[PAN_PERF_COUNTER_CAT_MAX]; uint32_t n_categories; }; struct pan_perf { struct pan_kmod_dev *dev; - unsigned core_id_range; + struct pan_kmod_perf_session *session; const struct pan_perf_config *cfg; - - // Memory where to dump counter values - uint32_t *counter_values; - uint32_t n_counter_values; - - /* Offsets of categories */ - unsigned category_offset[PAN_PERF_MAX_CATEGORIES]; + struct pan_kmod_perf_buffer_layout mem_layout; + double derived_configs[PAN_PERF_DERIVED_CONFIG_LAST + 1]; }; -uint32_t pan_perf_counter_read(const struct pan_perf_counter *counter, - const struct pan_perf *perf); +static inline +int64_t pan_perf_counter_read_raw(const struct pan_perf *perf, + enum pan_perf_counter_categories cat, + uint8_t counter_index, + uint8_t block) +{ + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_FRONTEND == (int)PAN_PERF_COUNTER_CAT_FRONTEND); + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_TILER == (int)PAN_PERF_COUNTER_CAT_TILER); + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_MEMSYS == (int)PAN_PERF_COUNTER_CAT_MEMSYS); + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_SHADER == (int)PAN_PERF_COUNTER_CAT_SHADER); + + assert(perf->session->data != NULL); + + const uint32_t offset = perf->mem_layout.category[cat].offset + + perf->mem_layout.block_stride * block + + perf->mem_layout.counter_stride * counter_index; + + uint8_t *val_ptr = ((uint8_t *)perf->session->data) + offset; + return pan_kmod_perf_load_counter(perf->session, val_ptr); +} + +int64_t pan_perf_counter_read(const struct pan_perf *perf, + const struct pan_perf_counter *counter, + uint8_t block); + +int64_t pan_perf_counter_read_block_sum(const struct pan_perf_counter *counter, + const struct pan_perf *perf); void pan_perf_init(struct pan_perf *perf, int fd); @@ -88,8 +132,16 @@ int pan_perf_enable(struct pan_perf *perf); int pan_perf_disable(struct pan_perf *perf); +void pan_perf_finish(struct pan_perf *perf); + int pan_perf_dump(struct pan_perf *perf); +uint64_t pan_perf_get_timestamp(const struct pan_perf *perf); + +bool pan_perf_timestamp_supported(const struct pan_perf *perf); + +uint64_t pan_perf_get_min_sampling_period(const struct pan_perf *perf); + #if defined(__cplusplus) } // extern "C" #endif diff --git a/src/panfrost/perf/quick.c b/src/panfrost/perf/quick.c index 1d72ca4928f..f221ec909d0 100644 --- a/src/panfrost/perf/quick.c +++ b/src/panfrost/perf/quick.c @@ -45,8 +45,8 @@ main(void) for (unsigned j = 0; j < cat->n_counters; ++j) { const struct pan_perf_counter *ctr = &cat->counters[j]; - uint32_t val = pan_perf_counter_read(ctr, perf); - printf("%s (%s): %u\n", ctr->name, ctr->symbol_name, val); + int64_t val = pan_perf_counter_read_block_sum(ctr, perf); + printf("%s (%s): %ld\n", ctr->name, ctr->symbol_name, val); } printf("\n"); diff --git a/src/tool/pps/cfg/gpu.cfg b/src/tool/pps/cfg/gpu.cfg index 2a6578375d8..e33007cd030 100644 --- a/src/tool/pps/cfg/gpu.cfg +++ b/src/tool/pps/cfg/gpu.cfg @@ -21,6 +21,15 @@ data_sources { } } +data_sources { + config { + name: "gpu.counters.panthor" + gpu_counter_config { + counter_period_ns: 500000 + } + } +} + data_sources { config { name: "gpu.counters.v3d" diff --git a/src/tool/pps/cfg/system.cfg b/src/tool/pps/cfg/system.cfg index 2e6ce480bb2..eebc5b3b9e7 100644 --- a/src/tool/pps/cfg/system.cfg +++ b/src/tool/pps/cfg/system.cfg @@ -73,6 +73,15 @@ data_sources { } } +data_sources { + config { + name: "gpu.counters.panthor" + gpu_counter_config { + counter_period_ns: 500000 + } + } +} + data_sources { config { name: "gpu.counters.v3d" diff --git a/src/tool/pps/pps_counter.h b/src/tool/pps/pps_counter.h index 7b032be81cb..c8f57b9d4b9 100644 --- a/src/tool/pps/pps_counter.h +++ b/src/tool/pps/pps_counter.h @@ -40,6 +40,10 @@ class Counter Byte, Hertz, None, + Primitive, + Instruction, + Pixel, + Fragment }; using Value = std::variant; diff --git a/src/tool/pps/pps_datasource.cc b/src/tool/pps/pps_datasource.cc index 168eda79c54..edceecd46d6 100644 --- a/src/tool/pps/pps_datasource.cc +++ b/src/tool/pps/pps_datasource.cc @@ -181,6 +181,18 @@ template void add_descriptors(GpuCounterDescript case Counter::Units::None: units = GpuCounterDescriptor::NONE; break; + case Counter::Units::Primitive: + units = GpuCounterDescriptor::PRIMITIVE; + break; + case Counter::Units::Instruction: + units = GpuCounterDescriptor::INSTRUCTION; + break; + case Counter::Units::Pixel: + units = GpuCounterDescriptor::PIXEL; + break; + case Counter::Units::Fragment: + units = GpuCounterDescriptor::FRAGMENT; + break; default: assert(false && "Missing counter units type!"); break; diff --git a/src/tool/pps/pps_driver.cc b/src/tool/pps/pps_driver.cc index 4eb3a17aa7c..5ef155633c2 100644 --- a/src/tool/pps/pps_driver.cc +++ b/src/tool/pps/pps_driver.cc @@ -49,6 +49,7 @@ std::unordered_map> create_supported_driver #ifdef PPS_PANFROST map.emplace("panfrost", std::make_unique()); + map.emplace("panthor", std::make_unique()); #endif // PPS_PANFROST #ifdef PPS_V3D