mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
Merge branch 'mr/pan-derived-counters' into 'main'
pan: Derived counter support See merge request mesa/mesa!41030
This commit is contained in:
commit
c9b5c131f3
47 changed files with 5851 additions and 1780 deletions
|
|
@ -220,6 +220,16 @@ To run the producer, follow these two simple steps:
|
|||
|
||||
./build/pps-producer
|
||||
|
||||
Panthor
|
||||
^^^^^^^
|
||||
|
||||
The Panthor PPS driver uses stable IOCTLs and don't need any special privileges to enable. The
|
||||
data source can be configured by running the producer :
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
./build/pps-producer
|
||||
|
||||
V3D / V3DV
|
||||
^^^^^^^^^^
|
||||
|
||||
|
|
|
|||
|
|
@ -154,6 +154,9 @@ enum drm_panthor_ioctl_id {
|
|||
* This is useful for imported BOs.
|
||||
*/
|
||||
DRM_PANTHOR_BO_QUERY_INFO,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_CONTROL: Control a performance counter session. */
|
||||
DRM_PANTHOR_PERF_CONTROL,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -253,6 +256,9 @@ enum drm_panthor_dev_query_type {
|
|||
* @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group priorities information.
|
||||
*/
|
||||
DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO,
|
||||
|
||||
/** @DRM_PANTHOR_DEV_QUERY_PERF_INFO: Query performance counter interface information. */
|
||||
DRM_PANTHOR_DEV_QUERY_PERF_INFO,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -445,6 +451,138 @@ struct drm_panthor_group_priorities_info {
|
|||
__u8 pad[3];
|
||||
};
|
||||
|
||||
/**
|
||||
* enum drm_panthor_perf_feat_flags - Performance counter configuration feature flags.
|
||||
*/
|
||||
enum drm_panthor_perf_feat_flags {
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT: Coarse-grained block states are supported. */
|
||||
DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT = 1 << 0,
|
||||
};
|
||||
|
||||
/**
|
||||
* enum drm_panthor_perf_block_type - Performance counter supported block types.
|
||||
*/
|
||||
enum drm_panthor_perf_block_type {
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_METADATA: Internal use only. */
|
||||
DRM_PANTHOR_PERF_BLOCK_METADATA = 0,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_FW: The FW counter block. */
|
||||
DRM_PANTHOR_PERF_BLOCK_FW,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_CSHW: The CSHW counter block. */
|
||||
DRM_PANTHOR_PERF_BLOCK_CSHW,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_TILER: The tiler counter block. */
|
||||
DRM_PANTHOR_PERF_BLOCK_TILER,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_MEMSYS: A memsys counter block. */
|
||||
DRM_PANTHOR_PERF_BLOCK_MEMSYS,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_SHADER: A shader core counter block. */
|
||||
DRM_PANTHOR_PERF_BLOCK_SHADER,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_FIRST: Internal use only. */
|
||||
DRM_PANTHOR_PERF_BLOCK_FIRST = DRM_PANTHOR_PERF_BLOCK_FW,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_LAST: Internal use only. */
|
||||
DRM_PANTHOR_PERF_BLOCK_LAST = DRM_PANTHOR_PERF_BLOCK_SHADER,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_BLOCK_MAX: Internal use only. */
|
||||
DRM_PANTHOR_PERF_BLOCK_MAX = DRM_PANTHOR_PERF_BLOCK_LAST + 1,
|
||||
};
|
||||
|
||||
/**
|
||||
* enum drm_panthor_perf_clock - Identifier of the clock used to produce the cycle count values
|
||||
* in a given block.
|
||||
*
|
||||
* Since the integrator has the choice of using one or more clocks, there may be some confusion
|
||||
* as to which blocks are counted by which clock values unless this information is explicitly
|
||||
* provided as part of every block sample. Not every single clock here can be used: in the simplest
|
||||
* case, all cycle counts will be associated with the top-level clock.
|
||||
*/
|
||||
enum drm_panthor_perf_clock {
|
||||
/** @DRM_PANTHOR_PERF_CLOCK_TOPLEVEL: Top-level CSF clock. */
|
||||
DRM_PANTHOR_PERF_CLOCK_TOPLEVEL,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_CLOCK_COREGROUP: Core group clock, responsible for the MMU, L2
|
||||
* caches and the tiler.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_CLOCK_COREGROUP,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_CLOCK_SHADER: Clock for the shader cores. */
|
||||
DRM_PANTHOR_PERF_CLOCK_SHADER,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_perf_info - Performance counter interface information
|
||||
*
|
||||
* Structure grouping all queryable information relating to the performance counter
|
||||
* interfaces.
|
||||
*/
|
||||
struct drm_panthor_perf_info {
|
||||
/**
|
||||
* @counters_per_block: The number of 8-byte counters available in a block.
|
||||
*/
|
||||
__u32 counters_per_block;
|
||||
|
||||
/**
|
||||
* @sample_header_size: The size of the header struct available at the beginning
|
||||
* of every sample.
|
||||
*/
|
||||
__u32 sample_header_size;
|
||||
|
||||
/**
|
||||
* @block_header_size: The size of the header struct inline with the counters for a
|
||||
* single block.
|
||||
*/
|
||||
__u32 block_header_size;
|
||||
|
||||
/**
|
||||
* @sample_size: The size of a fully annotated sample, starting with a sample header
|
||||
* of size @sample_header_size bytes, and all available blocks for the current
|
||||
* configuration, each comprised of @counters_per_block 64-bit counters and
|
||||
* a block header of @block_header_size bytes.
|
||||
*
|
||||
* The user must use this field to allocate size for the ring buffer. In
|
||||
* the case of new blocks being added, an old userspace can always use
|
||||
* this field and ignore any blocks it does not know about.
|
||||
*/
|
||||
__u32 sample_size;
|
||||
|
||||
/** @flags: Combination of drm_panthor_perf_feat_flags flags. */
|
||||
__u32 flags;
|
||||
|
||||
/**
|
||||
* @supported_clocks: Bitmask of the clocks supported by the GPU.
|
||||
*
|
||||
* Each bit represents a variant of the enum drm_panthor_perf_clock.
|
||||
*
|
||||
* For the same GPU, different implementers may have different clocks for the same hardware
|
||||
* block. At the moment, up to three clocks are supported, and any clocks that are present
|
||||
* will be reported here.
|
||||
*/
|
||||
__u32 supported_clocks;
|
||||
|
||||
/** @fw_blocks: Number of FW blocks available. */
|
||||
__u32 fw_blocks;
|
||||
|
||||
/** @cshw_blocks: Number of CSHW blocks available. */
|
||||
__u32 cshw_blocks;
|
||||
|
||||
/** @tiler_blocks: Number of tiler blocks available. */
|
||||
__u32 tiler_blocks;
|
||||
|
||||
/** @memsys_blocks: Number of memsys blocks available. */
|
||||
__u32 memsys_blocks;
|
||||
|
||||
/** @shader_blocks: Number of shader core blocks available. */
|
||||
__u32 shader_blocks;
|
||||
|
||||
/** @pad: MBZ. */
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY
|
||||
*/
|
||||
|
|
@ -1187,6 +1325,434 @@ struct drm_panthor_bo_query_info {
|
|||
__u32 pad;
|
||||
};
|
||||
|
||||
/**
|
||||
* DOC: Performance counter decoding in userspace.
|
||||
*
|
||||
* Each sample will be exposed to userspace in the following manner:
|
||||
*
|
||||
* +--------+--------+------------------------+--------+-------------------------+-----+
|
||||
* | Sample | Block | Block | Block | Block | ... |
|
||||
* | header | header | counters | header | counters | |
|
||||
* +--------+--------+------------------------+--------+-------------------------+-----+
|
||||
*
|
||||
* Each sample will start with a sample header of type @struct drm_panthor_perf_sample header,
|
||||
* providing sample-wide information like the start and end timestamps, the counter set currently
|
||||
* configured, and any errors that may have occurred during sampling.
|
||||
*
|
||||
* After the fixed size header, the sample will consist of blocks of
|
||||
* 64-bit @drm_panthor_dev_query_perf_info::counters_per_block counters, each prefaced with a
|
||||
* header of its own, indicating source block type, as well as the cycle count needed to normalize
|
||||
* cycle values within that block, and a clock source identifier.
|
||||
*/
|
||||
|
||||
/**
|
||||
* enum drm_panthor_perf_block_state - Bitmask of the power and execution states that an individual
|
||||
* hardware block went through in a sampling period.
|
||||
*
|
||||
* Because the sampling period is controlled from userspace, the block may undergo multiple
|
||||
* state transitions, so this must be interpreted as one or more such transitions occurring.
|
||||
*/
|
||||
enum drm_panthor_perf_block_state {
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN: The state of this block was unknown during
|
||||
* the sampling period.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN = 0,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_BLOCK_STATE_ON: This block was powered on for some or all of
|
||||
* the sampling period.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_BLOCK_STATE_ON = 1 << 0,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_BLOCK_STATE_OFF: This block was powered off for some or all of the
|
||||
* sampling period.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_BLOCK_STATE_OFF = 1 << 1,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE: This block was available for execution for
|
||||
* some or all of the sampling period.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE = 1 << 2,
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE: This block was unavailable for execution for
|
||||
* some or all of the sampling period.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE = 1 << 3,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL: This block was executing in normal mode
|
||||
* for some or all of the sampling period.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL = 1 << 4,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED: This block was executing in protected mode
|
||||
* for some or all of the sampling period.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED = 1 << 5,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_perf_block_header - Header present before every block in the
|
||||
* sample ringbuffer.
|
||||
*/
|
||||
struct drm_panthor_perf_block_header {
|
||||
/** @block_type: Type of the block. */
|
||||
__u8 block_type;
|
||||
|
||||
/** @block_idx: Block index. */
|
||||
__u8 block_idx;
|
||||
|
||||
/**
|
||||
* @block_states: Coarse-grained block transitions, bitmask of enum
|
||||
* drm_panthor_perf_block_states.
|
||||
*/
|
||||
__u8 block_states;
|
||||
|
||||
/**
|
||||
* @clock: Clock used to produce the cycle count for this block, taken from
|
||||
* enum drm_panthor_perf_clock. The cycle counts are stored in the sample header.
|
||||
*/
|
||||
__u8 clock;
|
||||
|
||||
/** @pad: MBZ. */
|
||||
__u8 pad[4];
|
||||
|
||||
/** @enable_mask: Bitmask of counters requested during the session setup. */
|
||||
__u64 enable_mask[2];
|
||||
};
|
||||
|
||||
/**
|
||||
* enum drm_panthor_perf_sample_flags - Sample-wide events that occurred over the sampling
|
||||
* period.
|
||||
*/
|
||||
enum drm_panthor_perf_sample_flags {
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_SAMPLE_OVERFLOW: This sample contains overflows due to the duration
|
||||
* of the sampling period.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_SAMPLE_OVERFLOW = 1 << 0,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_SAMPLE_ERROR: This sample encountered an error condition during
|
||||
* the sample duration.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_SAMPLE_ERROR = 1 << 1,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_perf_sample_header - Header present before every sample.
|
||||
*/
|
||||
struct drm_panthor_perf_sample_header {
|
||||
/**
|
||||
* @timestamp_start_ns: Earliest timestamp that values in this sample represent, in
|
||||
* nanoseconds. Derived from CLOCK_MONOTONIC_RAW.
|
||||
*/
|
||||
__u64 timestamp_start_ns;
|
||||
|
||||
/**
|
||||
* @timestamp_end_ns: Latest timestamp that values in this sample represent, in
|
||||
* nanoseconds. Derived from CLOCK_MONOTONIC_RAW.
|
||||
*/
|
||||
__u64 timestamp_end_ns;
|
||||
|
||||
/** @block_set: Set of performance counter blocks. */
|
||||
__u8 block_set;
|
||||
|
||||
/** @pad: MBZ. */
|
||||
__u8 pad[3];
|
||||
|
||||
/** @flags: Current sample flags, combination of drm_panthor_perf_sample_flags. */
|
||||
__u32 flags;
|
||||
|
||||
/**
|
||||
* @user_data: User data provided as part of the command that triggered this sample.
|
||||
*
|
||||
* - Automatic samples (periodic ones or those around non-counting periods or power state
|
||||
* transitions) will be tagged with the user_data provided as part of the
|
||||
* DRM_PANTHOR_PERF_COMMAND_START call.
|
||||
* - Manual samples will be tagged with the user_data provided with the
|
||||
* DRM_PANTHOR_PERF_COMMAND_SAMPLE call.
|
||||
* - A session's final automatic sample will be tagged with the user_data provided with the
|
||||
* DRM_PANTHOR_PERF_COMMAND_STOP call.
|
||||
*/
|
||||
__u64 user_data;
|
||||
|
||||
/**
|
||||
* @toplevel_clock_cycles: The number of cycles elapsed between
|
||||
* drm_panthor_perf_sample_header::timestamp_start_ns and
|
||||
* drm_panthor_perf_sample_header::timestamp_end_ns on the top-level clock if the
|
||||
* corresponding bit is set in drm_panthor_perf_info::supported_clocks.
|
||||
*/
|
||||
__u64 toplevel_clock_cycles;
|
||||
|
||||
/**
|
||||
* @coregroup_clock_cycles: The number of cycles elapsed between
|
||||
* drm_panthor_perf_sample_header::timestamp_start_ns and
|
||||
* drm_panthor_perf_sample_header::timestamp_end_ns on the coregroup clock if the
|
||||
* corresponding bit is set in drm_panthor_perf_info::supported_clocks.
|
||||
*/
|
||||
__u64 coregroup_clock_cycles;
|
||||
|
||||
/**
|
||||
* @shader_clock_cycles: The number of cycles elapsed between
|
||||
* drm_panthor_perf_sample_header::timestamp_start_ns and
|
||||
* drm_panthor_perf_sample_header::timestamp_end_ns on the shader core clock if the
|
||||
* corresponding bit is set in drm_panthor_perf_info::supported_clocks.
|
||||
*/
|
||||
__u64 shader_clock_cycles;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum drm_panthor_perf_command - Command type passed to the DRM_PANTHOR_PERF_CONTROL
|
||||
* IOCTL.
|
||||
*/
|
||||
enum drm_panthor_perf_command {
|
||||
/** @DRM_PANTHOR_PERF_COMMAND_SETUP: Create a new performance counter sampling context. */
|
||||
DRM_PANTHOR_PERF_COMMAND_SETUP,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_COMMAND_TEARDOWN: Teardown a performance counter sampling context. */
|
||||
DRM_PANTHOR_PERF_COMMAND_TEARDOWN,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_COMMAND_START: Start a sampling session on the indicated context. */
|
||||
DRM_PANTHOR_PERF_COMMAND_START,
|
||||
|
||||
/** @DRM_PANTHOR_PERF_COMMAND_STOP: Stop the sampling session on the indicated context. */
|
||||
DRM_PANTHOR_PERF_COMMAND_STOP,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_COMMAND_SAMPLE: Request a manual sample on the indicated context.
|
||||
*
|
||||
* When the sampling session is configured with a non-zero sampling frequency, any
|
||||
* DRM_PANTHOR_PERF_CONTROL calls with this command will be ignored and return an
|
||||
* -EINVAL.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_COMMAND_SAMPLE,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_perf_control - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL.
|
||||
*/
|
||||
struct drm_panthor_perf_control {
|
||||
/** @cmd: Command from enum drm_panthor_perf_command. */
|
||||
__u32 cmd;
|
||||
|
||||
/**
|
||||
* @handle: session handle.
|
||||
*
|
||||
* Returned by the DRM_PANTHOR_PERF_COMMAND_SETUP call.
|
||||
* It must be used in subsequent commands for the same context.
|
||||
*/
|
||||
__u32 handle;
|
||||
|
||||
/**
|
||||
* @size: size of the command structure.
|
||||
*
|
||||
* If the pointer is NULL, the size is updated by the driver to provide the size of the
|
||||
* output structure. If the pointer is not NULL, the driver will only copy min(size,
|
||||
* struct_size) to the pointer and update the size accordingly.
|
||||
*/
|
||||
__u64 size;
|
||||
|
||||
/**
|
||||
* @pointer: user pointer to a command type struct, such as
|
||||
* @struct drm_panthor_perf_cmd_start.
|
||||
*/
|
||||
__u64 pointer;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum drm_panthor_perf_counter_set - The counter set to be requested from the hardware.
|
||||
*
|
||||
* The hardware supports a single performance counter set at a time, so requesting any set other
|
||||
* than the primary may fail if another process is sampling at the same time.
|
||||
*
|
||||
* If in doubt, the primary counter set has the most commonly used counters and requires no
|
||||
* additional permissions to open.
|
||||
*/
|
||||
enum drm_panthor_perf_counter_set {
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_SET_PRIMARY: The default set configured on the hardware.
|
||||
*
|
||||
* This is the only set for which all counters in all blocks are defined.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_SET_PRIMARY,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_SET_SECONDARY: The secondary performance counter set.
|
||||
*
|
||||
* Some blocks may not have any defined counters for this set, and the block will
|
||||
* have the UNAVAILABLE block state permanently set in the block header.
|
||||
*
|
||||
* Accessing this set requires the calling process to have the CAP_PERFMON capability.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_SET_SECONDARY,
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_PERF_SET_TERTIARY: The tertiary performance counter set.
|
||||
*
|
||||
* Some blocks may not have any defined counters for this set, and the block will have
|
||||
* the UNAVAILABLE block state permanently set in the block header. Note that the
|
||||
* tertiary set has the fewest defined counter blocks.
|
||||
*
|
||||
* Accessing this set requires the calling process to have the CAP_PERFMON capability.
|
||||
*/
|
||||
DRM_PANTHOR_PERF_SET_TERTIARY,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_perf_ringbuf_control - Struct used to map in the ring buffer control indices
|
||||
* into memory shared between user and kernel.
|
||||
*
|
||||
*/
|
||||
struct drm_panthor_perf_ringbuf_control {
|
||||
/**
|
||||
* @extract_idx: The index of the latest sample that was processed by userspace. Only
|
||||
* modifiable by userspace.
|
||||
*/
|
||||
__u64 extract_idx;
|
||||
|
||||
/**
|
||||
* @insert_idx: The index of the latest sample emitted by the kernel. Only modifiable by
|
||||
* the kernel.
|
||||
*/
|
||||
__u64 insert_idx;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_perf_cmd_setup - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL
|
||||
* when the DRM_PANTHOR_PERF_COMMAND_SETUP command is specified.
|
||||
*/
|
||||
struct drm_panthor_perf_cmd_setup {
|
||||
/**
|
||||
* @block_set: Set of performance counter blocks, member of
|
||||
* enum drm_panthor_perf_block_set.
|
||||
*
|
||||
* This is a global configuration and only one set can be active at a time. If
|
||||
* another client has already requested a counter set, any further requests
|
||||
* for a different counter set will fail and return an -EBUSY.
|
||||
*
|
||||
* If the requested set does not exist, the request will fail and return an -EINVAL.
|
||||
*
|
||||
* Some sets have additional requirements to be enabled, and the setup request will
|
||||
* fail with an -EACCES if these requirements are not satisfied.
|
||||
*/
|
||||
__u8 block_set;
|
||||
|
||||
/** @pad: MBZ. */
|
||||
__u8 pad[7];
|
||||
|
||||
/** @fd: eventfd for signalling the availability of a new sample. */
|
||||
__u32 fd;
|
||||
|
||||
/** @ringbuf_handle: Handle to the BO to write perf counter sample to. */
|
||||
__u32 ringbuf_handle;
|
||||
|
||||
/**
|
||||
* @control_handle: Handle to the BO containing a contiguous 16 byte range, used for the
|
||||
* insert and extract indices for the ringbuffer.
|
||||
*/
|
||||
__u32 control_handle;
|
||||
|
||||
/**
|
||||
* @sample_slots: The number of slots available in the userspace-provided BO. Must be
|
||||
* a power of 2.
|
||||
*
|
||||
* If sample_slots * sample_size does not match the BO size, the setup request will fail.
|
||||
*/
|
||||
__u32 sample_slots;
|
||||
|
||||
/**
|
||||
* @control_offset: Offset into the control BO where the insert and extract indices are
|
||||
* located.
|
||||
*/
|
||||
__u64 control_offset;
|
||||
|
||||
/**
|
||||
* @sample_freq_ns: Period between automatic counter sample collection in nanoseconds. Zero
|
||||
* disables automatic collection and all collection must be done through explicit calls
|
||||
* to DRM_PANTHOR_PERF_CONTROL.SAMPLE. Non-zero values will disable manual counter sampling
|
||||
* via the DRM_PANTHOR_PERF_COMMAND_SAMPLE command.
|
||||
*
|
||||
* This disables software-triggered periodic sampling, but hardware will still trigger
|
||||
* automatic samples on certain events, including shader core power transitions, and
|
||||
* entries to and exits from non-counting periods. The final stop command will also
|
||||
* trigger a sample to ensure no data is lost.
|
||||
*/
|
||||
__u64 sample_freq_ns;
|
||||
|
||||
/**
|
||||
* @fw_enable_mask: Bitmask of counters to request from the FW counter block. Any bits
|
||||
* past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0
|
||||
* corresponds to counter 0.
|
||||
*/
|
||||
__u64 fw_enable_mask[2];
|
||||
|
||||
/**
|
||||
* @cshw_enable_mask: Bitmask of counters to request from the CSHW counter block. Any bits
|
||||
* past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0
|
||||
* corresponds to counter 0.
|
||||
*/
|
||||
__u64 cshw_enable_mask[2];
|
||||
|
||||
/**
|
||||
* @tiler_enable_mask: Bitmask of counters to request from the tiler counter block. Any
|
||||
* bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit
|
||||
* 0 corresponds to counter 0.
|
||||
*/
|
||||
__u64 tiler_enable_mask[2];
|
||||
|
||||
/**
|
||||
* @memsys_enable_mask: Bitmask of counters to request from the memsys counter blocks. Any
|
||||
* bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0
|
||||
* corresponds to counter 0.
|
||||
*/
|
||||
__u64 memsys_enable_mask[2];
|
||||
|
||||
/**
|
||||
* @shader_enable_mask: Bitmask of counters to request from the shader core counter blocks.
|
||||
* Any bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored.
|
||||
* Bit 0 corresponds to counter 0.
|
||||
*/
|
||||
__u64 shader_enable_mask[2];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_perf_cmd_start - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL
|
||||
* when the DRM_PANTHOR_PERF_COMMAND_START command is specified.
|
||||
*/
|
||||
struct drm_panthor_perf_cmd_start {
|
||||
/**
|
||||
* @user_data: User provided data that will be attached to automatic samples collected
|
||||
* until the next DRM_PANTHOR_PERF_COMMAND_STOP.
|
||||
*/
|
||||
__u64 user_data;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_perf_cmd_stop - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL
|
||||
* when the DRM_PANTHOR_PERF_COMMAND_STOP command is specified.
|
||||
*/
|
||||
struct drm_panthor_perf_cmd_stop {
|
||||
/**
|
||||
* @user_data: User provided data that will be attached to the automatic sample collected
|
||||
* at the end of this sampling session.
|
||||
*/
|
||||
__u64 user_data;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_panthor_perf_cmd_sample - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL
|
||||
* when the DRM_PANTHOR_PERF_COMMAND_SAMPLE command is specified.
|
||||
*/
|
||||
struct drm_panthor_perf_cmd_sample {
|
||||
/** @user_data: User provided data that will be attached to the sample.*/
|
||||
__u64 user_data;
|
||||
};
|
||||
|
||||
/**
|
||||
* DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number
|
||||
* @__access: Access type. Must be R, W or RW.
|
||||
|
|
@ -1237,6 +1803,8 @@ enum {
|
|||
DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync),
|
||||
DRM_IOCTL_PANTHOR_BO_QUERY_INFO =
|
||||
DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info),
|
||||
DRM_IOCTL_PANTHOR_PERF_CONTROL =
|
||||
DRM_IOCTL_PANTHOR(WR, PERF_CONTROL, perf_control)
|
||||
};
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
pps_panfrost_sources = [
|
||||
'pan_pps_perf.cpp',
|
||||
'pan_pps_dev.cpp',
|
||||
'pan_pps_driver.cpp'
|
||||
]
|
||||
|
||||
|
|
|
|||
36
src/panfrost/ds/pan_pps_dev.cpp
Normal file
36
src/panfrost/ds/pan_pps_dev.cpp
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright © 2021 Collabora, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "pan_pps_perf.h"
|
||||
|
||||
#include <lib/kmod/pan_kmod.h>
|
||||
#include <perf/pan_perf.h>
|
||||
|
||||
#include <pps/pps.h>
|
||||
#include <util/ralloc.h>
|
||||
|
||||
namespace pps {
|
||||
PanfrostDevice::PanfrostDevice(int fd): fd(fd)
|
||||
{
|
||||
assert(fd >= 0);
|
||||
}
|
||||
|
||||
PanfrostDevice::~PanfrostDevice()
|
||||
{
|
||||
}
|
||||
|
||||
PanfrostDevice::PanfrostDevice(PanfrostDevice &&o): fd{o.fd}
|
||||
{
|
||||
o.fd = -1;
|
||||
}
|
||||
|
||||
PanfrostDevice &
|
||||
PanfrostDevice::operator=(PanfrostDevice &&o)
|
||||
{
|
||||
std::swap(fd, o.fd);
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace pps
|
||||
|
|
@ -33,46 +33,13 @@ PanfrostDriver::~PanfrostDriver()
|
|||
uint64_t
|
||||
PanfrostDriver::get_min_sampling_period_ns()
|
||||
{
|
||||
return 1000000;
|
||||
return perf->get_min_sampling_period_ns();
|
||||
}
|
||||
|
||||
std::pair<std::vector<CounterGroup>, std::vector<Counter>>
|
||||
PanfrostDriver::create_available_counters(const PanfrostPerf &perf)
|
||||
{
|
||||
std::pair<std::vector<CounterGroup>, std::vector<Counter>> ret;
|
||||
auto &[groups, counters] = ret;
|
||||
|
||||
size_t cid = 0;
|
||||
|
||||
for (uint32_t gid = 0; gid < perf.perf->cfg->n_categories; ++gid) {
|
||||
const auto &category = perf.perf->cfg->categories[gid];
|
||||
CounterGroup group = {};
|
||||
group.id = gid;
|
||||
group.name = category.name;
|
||||
|
||||
for (size_t id = 0; id < category.n_counters; ++id) {
|
||||
Counter counter = {};
|
||||
counter.id = cid;
|
||||
counter.group = gid;
|
||||
|
||||
counter.name = category.counters[id].name;
|
||||
|
||||
counter.set_getter([=](const Counter &c, const Driver &d) {
|
||||
auto &pan_driver = PanfrostDriver::into(d);
|
||||
struct pan_perf *perf = pan_driver.perf->perf;
|
||||
const auto counter = &perf->cfg->categories[gid].counters[id];
|
||||
return int64_t(pan_perf_counter_read(counter, perf));
|
||||
});
|
||||
|
||||
group.counters.push_back(cid++);
|
||||
|
||||
counters.emplace_back(counter);
|
||||
}
|
||||
|
||||
groups.push_back(group);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return perf.create_available_counters();
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -81,9 +48,12 @@ PanfrostDriver::init_perfcnt()
|
|||
if (!dev) {
|
||||
dev = std::make_unique<PanfrostDevice>(drm_device.fd);
|
||||
}
|
||||
|
||||
if (!perf) {
|
||||
perf = std::make_unique<PanfrostPerf>(*dev);
|
||||
}
|
||||
|
||||
perf->init_perfcnt(drm_device.fd);
|
||||
if (groups.empty() && counters.empty()) {
|
||||
std::tie(groups, counters) = create_available_counters(*perf);
|
||||
}
|
||||
|
|
@ -106,9 +76,9 @@ PanfrostDriver::enable_all_counters()
|
|||
}
|
||||
|
||||
void
|
||||
PanfrostDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
|
||||
PanfrostDriver::enable_perfcnt(const uint64_t sampling_period_ns)
|
||||
{
|
||||
auto res = perf->enable();
|
||||
auto res = perf->enable_perfcnt(sampling_period_ns);
|
||||
if (!check(res, "Failed to enable performance counters")) {
|
||||
if (res == -ENOSYS) {
|
||||
PERFETTO_FATAL(
|
||||
|
|
@ -121,10 +91,8 @@ PanfrostDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
|
|||
bool
|
||||
PanfrostDriver::dump_perfcnt()
|
||||
{
|
||||
last_dump_ts = perfetto::base::GetBootTimeNs().count();
|
||||
|
||||
// Dump performance counters to buffer
|
||||
if (!check(perf->dump(), "Failed to dump performance counters")) {
|
||||
if (!check(perf->dump_perfcnt(), "Failed to dump performance counters")) {
|
||||
PERFETTO_ELOG("Skipping sample");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -135,15 +103,13 @@ PanfrostDriver::dump_perfcnt()
|
|||
uint64_t
|
||||
PanfrostDriver::next()
|
||||
{
|
||||
auto ret = last_dump_ts;
|
||||
last_dump_ts = 0;
|
||||
return ret;
|
||||
return perf->next();
|
||||
}
|
||||
|
||||
void
|
||||
PanfrostDriver::disable_perfcnt()
|
||||
{
|
||||
perf->disable();
|
||||
perf->disable_perfcnt();
|
||||
perf.reset();
|
||||
dev.reset();
|
||||
groups.clear();
|
||||
|
|
@ -154,20 +120,19 @@ PanfrostDriver::disable_perfcnt()
|
|||
uint32_t
|
||||
PanfrostDriver::gpu_clock_id() const
|
||||
{
|
||||
return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
|
||||
return perf->gpu_clock_id();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
PanfrostDriver::gpu_timestamp() const
|
||||
{
|
||||
return perfetto::base::GetBootTimeNs().count();
|
||||
return perf->gpu_timestamp();
|
||||
}
|
||||
|
||||
bool
|
||||
PanfrostDriver::cpu_gpu_timestamp(uint64_t &, uint64_t &) const
|
||||
PanfrostDriver::cpu_gpu_timestamp(uint64_t &cpu_timestamp, uint64_t &gpu_timestamp) const
|
||||
{
|
||||
/* Not supported */
|
||||
return false;
|
||||
return perf->cpu_gpu_timestamp(cpu_timestamp, gpu_timestamp);
|
||||
}
|
||||
|
||||
} // namespace pps
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
/*
|
||||
* Copyright © 2021 Collabora, Ltd.
|
||||
* Copyright © 2026 Arm, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "pan_pps_perf.h"
|
||||
#include "pan_pps_driver.h"
|
||||
|
||||
#include <lib/kmod/pan_kmod.h>
|
||||
#include <perf/pan_perf.h>
|
||||
|
|
@ -12,45 +14,24 @@
|
|||
#include <util/ralloc.h>
|
||||
|
||||
namespace pps {
|
||||
PanfrostDevice::PanfrostDevice(int fd): fd(fd)
|
||||
{
|
||||
assert(fd >= 0);
|
||||
}
|
||||
|
||||
PanfrostDevice::~PanfrostDevice()
|
||||
{
|
||||
}
|
||||
|
||||
PanfrostDevice::PanfrostDevice(PanfrostDevice &&o): fd{o.fd}
|
||||
{
|
||||
o.fd = -1;
|
||||
}
|
||||
|
||||
PanfrostDevice &
|
||||
PanfrostDevice::operator=(PanfrostDevice &&o)
|
||||
{
|
||||
std::swap(fd, o.fd);
|
||||
return *this;
|
||||
}
|
||||
|
||||
PanfrostPerf::PanfrostPerf(const PanfrostDevice &dev)
|
||||
: perf{reinterpret_cast<struct pan_perf *>(
|
||||
rzalloc(nullptr, struct pan_perf))}
|
||||
{
|
||||
perf = reinterpret_cast<struct pan_perf *>(
|
||||
rzalloc(nullptr, struct pan_perf));
|
||||
assert(perf);
|
||||
assert(dev.fd >= 0);
|
||||
pan_perf_init(perf, dev.fd);
|
||||
}
|
||||
|
||||
PanfrostPerf::~PanfrostPerf()
|
||||
{
|
||||
if (perf) {
|
||||
pan_perf_disable(perf);
|
||||
pan_perf_finish(perf);
|
||||
ralloc_free(perf);
|
||||
}
|
||||
}
|
||||
|
||||
PanfrostPerf::PanfrostPerf(PanfrostPerf &&o): perf{o.perf}
|
||||
PanfrostPerf::PanfrostPerf(PanfrostPerf &&o): perf(o.perf)
|
||||
{
|
||||
o.perf = nullptr;
|
||||
}
|
||||
|
|
@ -62,25 +43,181 @@ PanfrostPerf::operator=(PanfrostPerf &&o)
|
|||
return *this;
|
||||
}
|
||||
|
||||
bool
|
||||
PanfrostPerf::init_perfcnt(int fd)
|
||||
{
|
||||
pan_perf_init(perf, fd);
|
||||
|
||||
return perf != NULL;
|
||||
}
|
||||
|
||||
int
|
||||
PanfrostPerf::enable() const
|
||||
PanfrostPerf::enable_perfcnt(uint64_t /* sampling_period_ns */)
|
||||
{
|
||||
assert(perf);
|
||||
return pan_perf_enable(perf);
|
||||
}
|
||||
|
||||
void
|
||||
PanfrostPerf::disable() const
|
||||
PanfrostPerf::disable_perfcnt()
|
||||
{
|
||||
assert(perf);
|
||||
pan_perf_disable(perf);
|
||||
}
|
||||
|
||||
int
|
||||
PanfrostPerf::dump() const
|
||||
bool
|
||||
PanfrostPerf::dump_perfcnt()
|
||||
{
|
||||
assert(perf);
|
||||
return pan_perf_dump(perf);
|
||||
|
||||
int ret = pan_perf_dump(perf);
|
||||
|
||||
if (pan_perf_timestamp_supported(perf))
|
||||
last_dump_ts = pan_perf_get_timestamp(perf);
|
||||
else
|
||||
last_dump_ts = perfetto::base::GetBootTimeNs().count();
|
||||
|
||||
return !!(ret >= 0);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
PanfrostPerf::get_min_sampling_period_ns()
|
||||
{
|
||||
assert(perf);
|
||||
return pan_perf_get_min_sampling_period(perf);
|
||||
}
|
||||
|
||||
void *
|
||||
PanfrostPerf::get_subinstance() {
|
||||
return perf;
|
||||
}
|
||||
|
||||
std::string
|
||||
format_suffix(const char *fmt, uint8_t idx)
|
||||
{
|
||||
assert(strlen(fmt) < 200 && "fmt unreasonably long");
|
||||
char buf[256];
|
||||
std::snprintf(buf, sizeof(buf), fmt, idx);
|
||||
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
const char *
|
||||
get_block_suffix(uint8_t category)
|
||||
{
|
||||
assert(category <= PAN_PERF_COUNTER_CAT_MAX);
|
||||
|
||||
switch (category) {
|
||||
case PAN_PERF_COUNTER_CAT_MEMSYS:
|
||||
return " (slice %u)";
|
||||
case PAN_PERF_COUNTER_CAT_SHADER:
|
||||
return " (core %u)";
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Counter::Units
|
||||
convert_pan_units(enum pan_perf_counter_units unit)
|
||||
{
|
||||
switch (unit) {
|
||||
case PAN_PERF_COUNTER_UNITS_PRIMITIVES:
|
||||
return Counter::Units::Primitive;
|
||||
case PAN_PERF_COUNTER_UNITS_INSTRUCTIONS:
|
||||
return Counter::Units::Instruction;
|
||||
case PAN_PERF_COUNTER_UNITS_BYTES:
|
||||
return Counter::Units::Byte;
|
||||
case PAN_PERF_COUNTER_UNITS_PIXELS:
|
||||
return Counter::Units::Pixel;
|
||||
default:
|
||||
return Counter::Units::None;
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::vector<CounterGroup>, std::vector<Counter>>
|
||||
PanfrostPerf::create_available_counters() const
|
||||
{
|
||||
std::pair<std::vector<CounterGroup>, std::vector<Counter>> ret;
|
||||
auto &[groups, counters] = ret;
|
||||
|
||||
uint32_t global_counter_id = 0;
|
||||
|
||||
const struct pan_perf_category *category = NULL;
|
||||
for (uint32_t cat_idx = 0; cat_idx < perf->cfg->n_categories; ++cat_idx) {
|
||||
assert(cat_idx < PAN_PERF_COUNTER_CAT_MAX);
|
||||
category = &perf->cfg->categories[cat_idx];
|
||||
|
||||
CounterGroup group = {};
|
||||
group.id = cat_idx;
|
||||
group.name = category->name;
|
||||
|
||||
uint32_t n_blocks = perf->mem_layout.category[cat_idx].n_blocks;
|
||||
for (uint32_t counter_idx = 0; counter_idx < category->n_counters;
|
||||
++counter_idx) {
|
||||
const struct pan_perf_counter *cinfo =
|
||||
&category->counters[counter_idx];
|
||||
|
||||
for (uint32_t block_idx = 0; block_idx < n_blocks; ++block_idx) {
|
||||
const char *suffix = get_block_suffix(cat_idx);
|
||||
const std::string name =
|
||||
cinfo->name + (suffix ? format_suffix(suffix, block_idx) : "");
|
||||
|
||||
Counter counter = {};
|
||||
counter.id = global_counter_id++;
|
||||
counter.name = name;
|
||||
counter.group = group.id;
|
||||
counter.units = convert_pan_units(cinfo->units);
|
||||
|
||||
counter.set_getter([=](const Counter &c, const Driver &d) {
|
||||
auto &pan_driver = PanfrostDriver::into(d);
|
||||
struct pan_perf *perf = static_cast<struct pan_perf *>(
|
||||
pan_driver.perf->get_subinstance());
|
||||
return pan_perf_counter_read(perf, cinfo, block_idx);
|
||||
});
|
||||
|
||||
group.counters.push_back(counter.id);
|
||||
counters.emplace_back(counter);
|
||||
}
|
||||
}
|
||||
|
||||
groups.push_back(group);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
PanfrostPerf::next()
|
||||
{
|
||||
auto ret = last_dump_ts;
|
||||
last_dump_ts = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
PanfrostPerf::gpu_clock_id() const
|
||||
{
|
||||
assert(perf);
|
||||
if (pan_perf_timestamp_supported(perf))
|
||||
return perfetto::protos::pbzero::BUILTIN_CLOCK_MONOTONIC_RAW;
|
||||
else
|
||||
return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
PanfrostPerf::gpu_timestamp() const
|
||||
{
|
||||
// TODO (panthor) This information is present in the dump
|
||||
return perfetto::base::GetBootTimeNs().count();
|
||||
}
|
||||
|
||||
bool
|
||||
PanfrostPerf::cpu_gpu_timestamp(uint64_t &, uint64_t &) const
|
||||
{
|
||||
// TODO (panthor) Start using the appropriate IOCTL to get these values
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace pps
|
||||
|
|
|
|||
|
|
@ -5,6 +5,12 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <pps/pps.h>
|
||||
#include <pps/pps_counter.h>
|
||||
|
||||
struct pan_perf;
|
||||
|
||||
namespace pps {
|
||||
|
|
@ -27,17 +33,27 @@ class PanfrostPerf {
|
|||
PanfrostPerf(const PanfrostDevice &dev);
|
||||
~PanfrostPerf();
|
||||
|
||||
PanfrostPerf(const PanfrostPerf &) = delete;
|
||||
PanfrostPerf &operator=(const PanfrostPerf &) = delete;
|
||||
PanfrostPerf(PanfrostPerf &&o);
|
||||
PanfrostPerf &operator=(PanfrostPerf &&o);
|
||||
|
||||
PanfrostPerf(PanfrostPerf &&);
|
||||
PanfrostPerf &operator=(PanfrostPerf &&);
|
||||
std::pair<std::vector<CounterGroup>, std::vector<Counter>>
|
||||
create_available_counters() const;
|
||||
|
||||
int enable() const;
|
||||
void disable() const;
|
||||
int dump() const;
|
||||
uint64_t get_min_sampling_period_ns();
|
||||
bool init_perfcnt(int fd);
|
||||
int enable_perfcnt(uint64_t sampling_period_ns);
|
||||
void disable_perfcnt();
|
||||
bool dump_perfcnt();
|
||||
uint64_t next();
|
||||
uint32_t gpu_clock_id() const;
|
||||
uint64_t gpu_timestamp() const;
|
||||
bool cpu_gpu_timestamp(uint64_t &cpu_timestamp,
|
||||
uint64_t &gpu_timestamp) const;
|
||||
void *get_subinstance();
|
||||
|
||||
private:
|
||||
struct pan_perf *perf = nullptr;
|
||||
uint64_t last_dump_ts = 0;
|
||||
};
|
||||
|
||||
} // namespace pps
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2023 Collabora, Ltd.
|
||||
* Copyright © 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -37,6 +38,7 @@
|
|||
#include "util/u_dynarray.h"
|
||||
|
||||
#include "kmod/panthor_kmod.h"
|
||||
#include "pan_props.h"
|
||||
#include "pan_trace.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
|
@ -197,6 +199,9 @@ struct pan_kmod_dev_props {
|
|||
/* Texture feature bits. */
|
||||
uint32_t texture_features[4];
|
||||
|
||||
/* L2 feature bits. */
|
||||
uint32_t l2_features;
|
||||
|
||||
/* Maximum number of threads per core. */
|
||||
uint32_t max_threads_per_core;
|
||||
|
||||
|
|
@ -384,6 +389,54 @@ struct pan_kmod_va_range {
|
|||
uint64_t size;
|
||||
};
|
||||
|
||||
struct pan_kmod_perf_session {
|
||||
/* Device this perf session was created from. */
|
||||
struct pan_kmod_dev *dev;
|
||||
|
||||
/* Sample data pointer. */
|
||||
void* data;
|
||||
|
||||
/* If pan_kmod_perf_session::data_ts is supported. */
|
||||
bool data_ts_supported;
|
||||
|
||||
/* The timestamp of the sample data. */
|
||||
uint64_t data_ts;
|
||||
};
|
||||
|
||||
enum pan_kmod_perf_category {
|
||||
PAN_KMOD_PERF_CAT_FRONTEND,
|
||||
PAN_KMOD_PERF_CAT_TILER,
|
||||
PAN_KMOD_PERF_CAT_MEMSYS,
|
||||
PAN_KMOD_PERF_CAT_SHADER,
|
||||
/* Must be last. */
|
||||
PAN_KMOD_PERF_CAT_COUNT,
|
||||
};
|
||||
|
||||
/* Describes the memory layout of a buffer containing performance counters.
|
||||
* The buffer is structured like this:
|
||||
* sample {
|
||||
* header
|
||||
* categories [ category {
|
||||
* blocks [ block {
|
||||
* header
|
||||
* samples
|
||||
* }]
|
||||
* }]
|
||||
* }
|
||||
*/
|
||||
struct pan_kmod_perf_buffer_layout {
|
||||
struct {
|
||||
/* Offset from the start of the buffer in bytes. */
|
||||
uint32_t offset;
|
||||
/* Number of blocks for this category. */
|
||||
uint8_t n_blocks;
|
||||
} category[PAN_KMOD_PERF_CAT_COUNT];
|
||||
|
||||
uint32_t block_stride;
|
||||
uint32_t counter_stride;
|
||||
uint32_t counters_per_category;
|
||||
};
|
||||
|
||||
/* KMD backend vtable.
|
||||
*
|
||||
* All methods described there are mandatory, unless explicitly flagged as
|
||||
|
|
@ -474,6 +527,25 @@ struct pan_kmod_ops {
|
|||
|
||||
/* Label the BO */
|
||||
void (*bo_set_label)(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const char *label);
|
||||
|
||||
/* Initialize a perf session. */
|
||||
struct pan_kmod_perf_session *(*perf_create)(struct pan_kmod_dev *dev);
|
||||
|
||||
/* Enable perf counters. */
|
||||
int (*perf_enable)(struct pan_kmod_perf_session *session);
|
||||
|
||||
/* Disable perf counters. */
|
||||
int (*perf_disable)(struct pan_kmod_perf_session *session);
|
||||
|
||||
/* Dump collected perf counters. */
|
||||
int (*perf_dump)(struct pan_kmod_perf_session *session);
|
||||
|
||||
/* Destroy a perf session. */
|
||||
void (*perf_destroy)(struct pan_kmod_perf_session *session);
|
||||
|
||||
/* Query the memory layout for a counter buffer. */
|
||||
void (*perf_query_layout)(const struct pan_kmod_perf_session *session,
|
||||
struct pan_kmod_perf_buffer_layout *layout);
|
||||
};
|
||||
|
||||
/* KMD information. */
|
||||
|
|
@ -783,6 +855,64 @@ pan_kmod_query_timestamp(const struct pan_kmod_dev *dev)
|
|||
return dev->ops->query_timestamp(dev);
|
||||
}
|
||||
|
||||
static inline struct pan_kmod_perf_session *
|
||||
pan_kmod_perf_create(struct pan_kmod_dev *dev)
|
||||
{
|
||||
return dev->ops->perf_create(dev);
|
||||
}
|
||||
|
||||
static inline int
|
||||
pan_kmod_perf_enable(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
return session->dev->ops->perf_enable(session);
|
||||
}
|
||||
|
||||
static inline int
|
||||
pan_kmod_perf_disable(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
return session->dev->ops->perf_disable(session);
|
||||
}
|
||||
|
||||
static inline int
|
||||
pan_kmod_perf_dump(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
return session->dev->ops->perf_dump(session);
|
||||
}
|
||||
|
||||
static inline void
|
||||
pan_kmod_perf_destroy(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
session->dev->ops->perf_destroy(session);
|
||||
}
|
||||
|
||||
static inline void
|
||||
pan_kmod_perf_query_layout(const struct pan_kmod_perf_session *session,
|
||||
struct pan_kmod_perf_buffer_layout *layout)
|
||||
{
|
||||
session->dev->ops->perf_query_layout(session, layout);
|
||||
}
|
||||
|
||||
/* Load a counter value from the given address. */
|
||||
static inline int64_t
|
||||
pan_kmod_perf_load_counter(const struct pan_kmod_perf_session *session,
|
||||
const void *ptr)
|
||||
{
|
||||
if (pan_arch(session->dev->props.gpu_id) < 10)
|
||||
return *((const uint32_t*)ptr);
|
||||
else {
|
||||
const uint64_t val = *((const uint64_t*)ptr);
|
||||
#ifndef NDEBUG
|
||||
/*
|
||||
* Even though the uAPI permits 64-bit unsigned counters, the counter
|
||||
* values realistically never exceed INT64_MAX.
|
||||
*/
|
||||
return (val > INT64_MAX) ? -EINVAL : val;
|
||||
#else
|
||||
return val;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -42,6 +42,10 @@ struct panfrost_kmod_bo {
|
|||
uint64_t offset;
|
||||
};
|
||||
|
||||
struct panfrost_kmod_perf_session {
|
||||
struct pan_kmod_perf_session base;
|
||||
};
|
||||
|
||||
/* Abstraction over the raw drm_panfrost_get_param ioctl for fetching
|
||||
* information about devices.
|
||||
*/
|
||||
|
|
@ -163,6 +167,8 @@ panfrost_dev_query_props(struct panfrost_kmod_dev *panfrost_dev)
|
|||
panfrost_query_raw(fd, DRM_PANFROST_PARAM_MEM_FEATURES, true, 0);
|
||||
props->mmu_features =
|
||||
panfrost_query_raw(fd, DRM_PANFROST_PARAM_MMU_FEATURES, true, 0);
|
||||
props->l2_features =
|
||||
panfrost_query_raw(fd, DRM_PANFROST_PARAM_L2_FEATURES, true, 0);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(props->texture_features); i++) {
|
||||
props->texture_features[i] = panfrost_query_raw(
|
||||
|
|
@ -607,6 +613,106 @@ panfrost_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const c
|
|||
mesa_loge("DRM_IOCTL_PANFROST_SET_LABEL_BO failed (err=%d)", errno);
|
||||
}
|
||||
|
||||
static inline struct pan_kmod_perf_session *
|
||||
panfrost_kmod_perf_init(struct pan_kmod_dev *dev)
|
||||
{
|
||||
UNUSED struct panfrost_kmod_dev *panfrost_dev =
|
||||
container_of(dev, struct panfrost_kmod_dev, base);
|
||||
|
||||
struct panfrost_kmod_perf_session *sess =
|
||||
pan_kmod_dev_alloc(dev, sizeof(*sess));
|
||||
if (!sess) {
|
||||
mesa_loge("failed to allocate a panfrost_kmod_perf_session object");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sess->base.dev = dev;
|
||||
|
||||
struct pan_kmod_perf_buffer_layout layout;
|
||||
pan_kmod_perf_query_layout(&sess->base, &layout);
|
||||
|
||||
uint32_t n_counters = 0;
|
||||
for (uint32_t cat = 0; cat < PAN_KMOD_PERF_CAT_COUNT; ++cat)
|
||||
n_counters += layout.category[cat].n_blocks * layout.counters_per_category;
|
||||
|
||||
uint32_t* counter_values = pan_kmod_dev_alloc(dev, sizeof(uint32_t) * n_counters);
|
||||
sess->base.data = counter_values;
|
||||
sess->base.data_ts_supported = false;
|
||||
|
||||
mesa_logd("perf session created");
|
||||
|
||||
return &(sess->base);
|
||||
}
|
||||
|
||||
static int
|
||||
panfrost_kmod_perf_query(struct pan_kmod_perf_session *session, uint32_t enable)
|
||||
{
|
||||
struct drm_panfrost_perfcnt_enable perfcnt_enable = {enable, 0};
|
||||
return pan_kmod_ioctl(session->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_ENABLE,
|
||||
&perfcnt_enable);
|
||||
}
|
||||
|
||||
static int
|
||||
panfrost_kmod_perf_enable(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
return panfrost_kmod_perf_query(session, 1 /* enable */);
|
||||
}
|
||||
|
||||
static int
|
||||
panfrost_kmod_perf_disable(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
return panfrost_kmod_perf_query(session, 0 /* disable */);
|
||||
}
|
||||
|
||||
static int
|
||||
panfrost_kmod_perf_dump(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
struct drm_panfrost_perfcnt_dump perfcnt_dump = {
|
||||
(uint64_t)(uintptr_t)session->data};
|
||||
return pan_kmod_ioctl(session->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_DUMP,
|
||||
&perfcnt_dump);
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_kmod_perf_query_layout(const struct pan_kmod_perf_session *session,
|
||||
struct pan_kmod_perf_buffer_layout *layout)
|
||||
{
|
||||
/* Generally counter blocks are laid out in the following order:
|
||||
* Job manager, tiler, one or more L2 caches, and one or more shader cores.
|
||||
*/
|
||||
unsigned l2_slices = pan_query_l2_slices(&session->dev->props);
|
||||
unsigned core_id_range;
|
||||
pan_query_core_count(&session->dev->props, &core_id_range);
|
||||
|
||||
/* On all Bifrost architectures this is 64. */
|
||||
const unsigned counters_per_cat = 64;
|
||||
layout->counters_per_category = counters_per_cat;
|
||||
layout->counter_stride = sizeof(uint32_t);
|
||||
layout->block_stride = counters_per_cat * sizeof(uint32_t);
|
||||
|
||||
/* Setup the layout */
|
||||
layout->category[PAN_KMOD_PERF_CAT_FRONTEND].n_blocks = 1;
|
||||
layout->category[PAN_KMOD_PERF_CAT_TILER].n_blocks = 1;
|
||||
layout->category[PAN_KMOD_PERF_CAT_MEMSYS].n_blocks = l2_slices;
|
||||
layout->category[PAN_KMOD_PERF_CAT_SHADER].n_blocks = core_id_range;
|
||||
|
||||
layout->category[0].offset = 0;
|
||||
for (unsigned cat_idx = 1; cat_idx < PAN_KMOD_PERF_CAT_COUNT; ++cat_idx) {
|
||||
layout->category[cat_idx].offset =
|
||||
layout->category[cat_idx - 1].offset +
|
||||
layout->category[cat_idx - 1].n_blocks * counters_per_cat;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_kmod_perf_destroy(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
if (session->data)
|
||||
pan_kmod_dev_free(session->dev, session->data);
|
||||
pan_kmod_dev_free(session->dev, session);
|
||||
mesa_logd("perf session destroyed");
|
||||
}
|
||||
|
||||
const struct pan_kmod_ops panfrost_kmod_ops = {
|
||||
.dev_create = panfrost_kmod_dev_create,
|
||||
.dev_destroy = panfrost_kmod_dev_destroy,
|
||||
|
|
@ -624,4 +730,10 @@ const struct pan_kmod_ops panfrost_kmod_ops = {
|
|||
.vm_bind = panfrost_kmod_vm_bind,
|
||||
.query_timestamp = panfrost_kmod_query_timestamp,
|
||||
.bo_set_label = panfrost_kmod_bo_label,
|
||||
.perf_create = panfrost_kmod_perf_init,
|
||||
.perf_enable = panfrost_kmod_perf_enable,
|
||||
.perf_disable = panfrost_kmod_perf_disable,
|
||||
.perf_dump = panfrost_kmod_perf_dump,
|
||||
.perf_query_layout = panfrost_kmod_perf_query_layout,
|
||||
.perf_destroy = panfrost_kmod_perf_destroy,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2023 Collabora, Ltd.
|
||||
* Copyright © 2026 Arm, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -7,6 +8,7 @@
|
|||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <xf86drm.h>
|
||||
#include <sys/eventfd.h>
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/libsync.h"
|
||||
|
|
@ -20,6 +22,8 @@
|
|||
#include "drm-uapi/dma-buf.h"
|
||||
#include "drm-uapi/panthor_drm.h"
|
||||
|
||||
#include "util/timespec.h"
|
||||
|
||||
#include "pan_kmod_backend.h"
|
||||
#include "pan_props.h"
|
||||
|
||||
|
|
@ -103,6 +107,43 @@ struct panthor_kmod_bo {
|
|||
} sync;
|
||||
};
|
||||
|
||||
struct panthor_kmod_perf_session {
|
||||
struct pan_kmod_perf_session base;
|
||||
|
||||
struct {
|
||||
int event;
|
||||
} fds;
|
||||
int session_handle;
|
||||
|
||||
struct {
|
||||
int ringbuf;
|
||||
int control;
|
||||
} bos;
|
||||
|
||||
struct {
|
||||
size_t sample;
|
||||
size_t block;
|
||||
size_t ringbuf;
|
||||
size_t control;
|
||||
size_t sample_header;
|
||||
size_t block_header;
|
||||
} sizes;
|
||||
|
||||
struct {
|
||||
size_t cshw_blocks;
|
||||
size_t tiler_blocks;
|
||||
size_t memsys_blocks;
|
||||
size_t shader_blocks;
|
||||
} config;
|
||||
|
||||
bool session_initialized;
|
||||
bool active;
|
||||
uint8_t set;
|
||||
uint64_t sample_idx;
|
||||
uint8_t *ringbuffer;
|
||||
struct drm_panthor_perf_ringbuf_control *ctrl;
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
to_kmod_group_allow_priority_flags(uint32_t panthor_flags)
|
||||
{
|
||||
|
|
@ -160,6 +201,7 @@ panthor_dev_query_props(struct panthor_kmod_dev *panthor_dev)
|
|||
.tiler_features = panthor_dev->props.gpu.tiler_features,
|
||||
.mem_features = panthor_dev->props.gpu.mem_features,
|
||||
.mmu_features = panthor_dev->props.gpu.mmu_features,
|
||||
.l2_features = panthor_dev->props.gpu.l2_features,
|
||||
|
||||
/* This register does not exist because AFBC is no longer optional. */
|
||||
.afbc_features = 0,
|
||||
|
|
@ -1302,6 +1344,472 @@ panthor_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const ch
|
|||
mesa_loge("DRM_IOCTL_PANTHOR_BO_SET_LABEL failed (err=%d)", errno);
|
||||
}
|
||||
|
||||
/* ================ PERF COUNTERS ================= */
|
||||
|
||||
#define PANTHOR_SAMPLE_SLOTS (32)
|
||||
#define PANTHOR_POLL_TIMEOUT_SEC (10)
|
||||
#define PTR_TO_U64(ptr) ((uint64_t)(uintptr_t)(ptr))
|
||||
|
||||
struct panthor_perf_sample {
|
||||
struct drm_panthor_perf_sample_header sample_header;
|
||||
uint8_t bytes[];
|
||||
};
|
||||
|
||||
static int
|
||||
perf_cmd_setup(int fd, int eventfd, int ringbuf_handle, int control_handle, uint8_t set)
|
||||
{
|
||||
struct drm_panthor_perf_cmd_setup setup = {
|
||||
.fd = eventfd,
|
||||
.block_set = set,
|
||||
.ringbuf_handle = ringbuf_handle,
|
||||
.control_handle = control_handle,
|
||||
.sample_slots = PANTHOR_SAMPLE_SLOTS,
|
||||
.cshw_enable_mask = { UINT64_MAX, UINT64_MAX },
|
||||
.tiler_enable_mask = { UINT64_MAX, UINT64_MAX },
|
||||
.memsys_enable_mask = { UINT64_MAX, UINT64_MAX },
|
||||
.shader_enable_mask = { UINT64_MAX, UINT64_MAX },
|
||||
};
|
||||
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_SETUP,
|
||||
.size = sizeof(setup),
|
||||
.pointer = PTR_TO_U64(&setup),
|
||||
};
|
||||
|
||||
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
}
|
||||
|
||||
static int
|
||||
perf_cmd_start(int fd, int sid, uint64_t user_data)
|
||||
{
|
||||
struct drm_panthor_perf_cmd_start start = {
|
||||
.user_data = user_data,
|
||||
};
|
||||
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_START,
|
||||
.handle = sid,
|
||||
.size = sizeof(start),
|
||||
.pointer = PTR_TO_U64(&start),
|
||||
};
|
||||
|
||||
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
}
|
||||
|
||||
static int
|
||||
perf_cmd_stop(int fd, int sid, uint64_t user_data)
|
||||
{
|
||||
struct drm_panthor_perf_cmd_stop stop = {};
|
||||
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_STOP,
|
||||
.handle = sid,
|
||||
.size = sizeof(stop),
|
||||
.pointer = PTR_TO_U64(&stop),
|
||||
};
|
||||
|
||||
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
}
|
||||
|
||||
static int
|
||||
perf_cmd_sample(int fd, int sid, uint64_t user_data)
|
||||
{
|
||||
struct drm_panthor_perf_cmd_sample sample = {
|
||||
.user_data = user_data,
|
||||
};
|
||||
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_SAMPLE,
|
||||
.handle = sid,
|
||||
.size = sizeof(sample),
|
||||
.pointer = PTR_TO_U64(&sample),
|
||||
};
|
||||
|
||||
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
}
|
||||
|
||||
#define DUMMY_PTR ((uint8_t *)1)
|
||||
|
||||
static int
|
||||
perf_cmd_teardown(int fd, int sid)
|
||||
{
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_TEARDOWN,
|
||||
.handle = sid,
|
||||
};
|
||||
|
||||
int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
unmap_and_teardown_bo(int fd, int handle, void *addr, size_t size)
|
||||
{
|
||||
if (addr)
|
||||
munmap(addr, size);
|
||||
|
||||
struct drm_gem_close ringbuf_close = {
|
||||
.handle = handle,
|
||||
};
|
||||
return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &ringbuf_close);
|
||||
}
|
||||
|
||||
static int
|
||||
create_and_map_bo(int fd, size_t size, int *handle, void **mapping)
|
||||
{
|
||||
struct drm_panthor_bo_create bo = {
|
||||
.size = size,
|
||||
};
|
||||
int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_CREATE, &bo);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
struct drm_panthor_bo_mmap_offset offset = {
|
||||
.handle = bo.handle,
|
||||
};
|
||||
ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET, &offset);
|
||||
if (ret)
|
||||
goto term_bo;
|
||||
|
||||
void *map = mmap(0, bo.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off_t)offset.offset);
|
||||
if (!map || map == MAP_FAILED) {
|
||||
ret = -EINVAL;
|
||||
goto term_bo;
|
||||
}
|
||||
|
||||
*handle = bo.handle;
|
||||
*mapping = map;
|
||||
|
||||
return 0;
|
||||
term_bo:
|
||||
return unmap_and_teardown_bo(fd, bo.handle, NULL, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
poll_for_sample(int poll_fd)
|
||||
{
|
||||
int ret;
|
||||
eventfd_t tmp;
|
||||
struct pollfd pfd[1] = {
|
||||
{
|
||||
.fd = poll_fd,
|
||||
.events = POLLIN
|
||||
}
|
||||
};
|
||||
struct timespec timeout = {
|
||||
.tv_sec = PANTHOR_POLL_TIMEOUT_SEC,
|
||||
};
|
||||
struct timespec now, result, deadline;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
timespec_add(&deadline, &now, &timeout);
|
||||
|
||||
do {
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
timespec_sub_saturate(&result, &deadline, &now);
|
||||
ret = ppoll(pfd, 1, &result, NULL);
|
||||
} while (ret == -1 && errno == EINTR);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return eventfd_read(poll_fd, &tmp);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
read_extract_idx(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
return p_atomic_read(&perf->ctrl->extract_idx);
|
||||
}
|
||||
|
||||
static void
|
||||
write_extract_idx(struct panthor_kmod_perf_session *perf, uint64_t idx)
|
||||
{
|
||||
p_atomic_set(&perf->ctrl->extract_idx, idx);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
read_insert_idx(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
return p_atomic_read(&perf->ctrl->insert_idx);
|
||||
}
|
||||
|
||||
static inline struct pan_kmod_perf_session *
|
||||
panthor_kmod_perf_init(struct pan_kmod_dev *dev)
|
||||
{
|
||||
UNUSED struct panthor_kmod_dev *panthor_dev =
|
||||
container_of(dev, struct panthor_kmod_dev, base);
|
||||
|
||||
struct panthor_kmod_perf_session *sess =
|
||||
pan_kmod_dev_alloc(dev, sizeof(*sess));
|
||||
if (!sess) {
|
||||
mesa_loge("failed to allocate a panthor_kmod_perf_session object");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sess->base.dev = dev;
|
||||
|
||||
struct drm_panthor_gpu_info gpu_info = {};
|
||||
struct drm_panthor_dev_query query = {
|
||||
.type = DRM_PANTHOR_DEV_QUERY_GPU_INFO,
|
||||
.size = sizeof(gpu_info),
|
||||
.pointer = (uint64_t)(uintptr_t)&gpu_info,
|
||||
};
|
||||
|
||||
int ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query);
|
||||
if (ret)
|
||||
goto free_perf;
|
||||
|
||||
struct drm_panthor_perf_info perf_info = {};
|
||||
|
||||
query = (struct drm_panthor_dev_query) {
|
||||
.type = DRM_PANTHOR_DEV_QUERY_PERF_INFO,
|
||||
.size = sizeof(perf_info),
|
||||
.pointer = (uint64_t)(uintptr_t)&perf_info,
|
||||
};
|
||||
|
||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query);
|
||||
if (ret)
|
||||
goto free_perf;
|
||||
|
||||
sess->fds.event = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE);
|
||||
if (!sess->fds.event)
|
||||
goto free_perf;
|
||||
|
||||
const size_t block_size = perf_info.counters_per_block * sizeof(uint64_t) +
|
||||
perf_info.block_header_size;
|
||||
const size_t sample_size = perf_info.sample_size;
|
||||
const size_t buffer_size = sample_size * PANTHOR_SAMPLE_SLOTS;
|
||||
|
||||
sess->sizes.block = block_size;
|
||||
sess->sizes.sample = sample_size;
|
||||
sess->sizes.ringbuf = buffer_size;
|
||||
sess->sizes.control = sizeof(*sess->ctrl);
|
||||
sess->sizes.sample_header = perf_info.sample_header_size;
|
||||
sess->sizes.block_header = perf_info.block_header_size;
|
||||
|
||||
if (sess->sizes.sample_header != sizeof(struct drm_panthor_perf_sample_header))
|
||||
fprintf(stderr, "panfrost perf sample header size mismatch!");
|
||||
|
||||
if (sess->sizes.block_header != sizeof(struct drm_panthor_perf_block_header))
|
||||
fprintf(stderr, "panfrost perf block header size mismatch!");
|
||||
|
||||
sess->config.cshw_blocks = perf_info.cshw_blocks;
|
||||
sess->config.tiler_blocks = perf_info.tiler_blocks;
|
||||
sess->config.memsys_blocks = perf_info.memsys_blocks;
|
||||
sess->config.shader_blocks = perf_info.shader_blocks;
|
||||
|
||||
void *buf_map;
|
||||
ret = create_and_map_bo(dev->fd, sess->sizes.ringbuf, &sess->bos.ringbuf, &buf_map);
|
||||
if (ret)
|
||||
goto free_eventfd;
|
||||
|
||||
sess->ringbuffer = buf_map;
|
||||
sess->base.data = buf_map;
|
||||
sess->base.data_ts_supported = true;
|
||||
|
||||
void *control_map;
|
||||
ret = create_and_map_bo(dev->fd, sess->sizes.control, &sess->bos.control, &control_map);
|
||||
if (ret)
|
||||
goto free_ringbuf;
|
||||
|
||||
sess->ctrl = (struct drm_panthor_perf_ringbuf_control *)control_map;
|
||||
|
||||
sess->set = 0; /* TODO should we make it configurable? */
|
||||
sess->active = false;
|
||||
sess->session_initialized = false;
|
||||
|
||||
return &(sess->base);
|
||||
|
||||
free_ringbuf:
|
||||
unmap_and_teardown_bo(dev->fd, sess->bos.ringbuf, buf_map, sess->sizes.ringbuf);
|
||||
free_eventfd:
|
||||
close(sess->fds.event);
|
||||
free_perf:
|
||||
ralloc_free(sess);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_kmod_perf_enable(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *psess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
/* The session cannot be created outside of the sampling thread. */
|
||||
if (!psess->session_initialized) {
|
||||
int session_handle = perf_cmd_setup(psess->base.dev->fd, psess->fds.event, psess->bos.ringbuf,
|
||||
psess->bos.control, psess->set);
|
||||
|
||||
if (session_handle < 0)
|
||||
return -EINVAL;
|
||||
|
||||
psess->session_handle = session_handle;
|
||||
psess->session_initialized = true;
|
||||
}
|
||||
|
||||
int ret = perf_cmd_start(psess->base.dev->fd, psess->session_handle, psess->sample_idx++);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
psess->active = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_kmod_perf_disable(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *sess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
int ret = perf_cmd_stop(sess->base.dev->fd, sess->session_handle, sess->sample_idx++);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
sess->active = false;
|
||||
|
||||
ret = poll_for_sample(sess->fds.event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_perf_sample(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
const uint64_t insert_idx = read_insert_idx(perf);
|
||||
const uint64_t extract_idx = read_extract_idx(perf);
|
||||
|
||||
// If there's an outstanding sample, discard it
|
||||
if (insert_idx != extract_idx)
|
||||
write_extract_idx(perf, insert_idx);
|
||||
|
||||
// Otherwise, request a new sample which will increment the insert idx
|
||||
int ret = perf_cmd_sample(perf->base.dev->fd, perf->session_handle, perf->sample_idx++);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = poll_for_sample(perf->fds.event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint8_t *get_base_addr(uint8_t *buf, size_t idx, size_t stride)
|
||||
{
|
||||
return buf + idx * stride;
|
||||
}
|
||||
|
||||
static inline struct panthor_perf_sample *perf_sample_idx(struct panthor_kmod_perf_session *perf, uint64_t idx)
|
||||
{
|
||||
return (struct panthor_perf_sample *)get_base_addr(perf->ringbuffer, idx, perf->sizes.sample);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
panthor_perf_get_sample_timestamp(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
const uint64_t extract_idx = read_extract_idx(perf);
|
||||
const struct panthor_perf_sample *sample = perf_sample_idx(perf, extract_idx);
|
||||
|
||||
return sample->sample_header.timestamp_end_ns;
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_kmod_perf_dump(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *psess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
int ret = panthor_perf_sample(psess);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Update data pointer to the correct spot in the ringbuffer. */
|
||||
session->data = perf_sample_idx(psess, read_extract_idx(psess));
|
||||
session->data_ts = panthor_perf_get_sample_timestamp(psess);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
panthor_kmod_perf_query_layout(const struct pan_kmod_perf_session *session,
|
||||
struct pan_kmod_perf_buffer_layout *layout)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *psess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
/* On all Valhall architectures this is 128. */
|
||||
const unsigned counters_per_cat = 128;
|
||||
layout->counters_per_category = counters_per_cat;
|
||||
|
||||
layout->block_stride = psess->sizes.block;
|
||||
layout->counter_stride = sizeof(uint64_t);
|
||||
|
||||
/* Setup the layout */
|
||||
layout->category[PAN_KMOD_PERF_CAT_FRONTEND].n_blocks = psess->config.cshw_blocks;
|
||||
layout->category[PAN_KMOD_PERF_CAT_TILER].n_blocks = psess->config.tiler_blocks;
|
||||
layout->category[PAN_KMOD_PERF_CAT_MEMSYS].n_blocks = psess->config.memsys_blocks;
|
||||
layout->category[PAN_KMOD_PERF_CAT_SHADER].n_blocks = psess->config.shader_blocks;
|
||||
|
||||
layout->category[0].offset =
|
||||
psess->sizes.sample_header + psess->sizes.block_header;
|
||||
for (unsigned cat_idx = 1; cat_idx < PAN_KMOD_PERF_CAT_COUNT; ++cat_idx) {
|
||||
layout->category[cat_idx].offset =
|
||||
layout->category[cat_idx - 1].offset +
|
||||
layout->category[cat_idx - 1].n_blocks * layout->block_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_perf_stop(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
int ret = perf_cmd_stop(perf->base.dev->fd, perf->session_handle, perf->sample_idx++);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
perf->active = false;
|
||||
|
||||
ret = poll_for_sample(perf->fds.event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
panthor_kmod_perf_destroy(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *psess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
int ret;
|
||||
|
||||
if (psess->active) {
|
||||
ret = panthor_perf_stop(psess);
|
||||
assert(ret == 0);
|
||||
}
|
||||
|
||||
ret = perf_cmd_teardown(psess->base.dev->fd, psess->session_handle);
|
||||
assert(ret == 0);
|
||||
|
||||
ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.ringbuf, psess->ringbuffer, psess->sizes.ringbuf);
|
||||
assert(ret == 0);
|
||||
|
||||
ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.control, psess->ctrl, psess->sizes.control);
|
||||
assert(ret == 0);
|
||||
|
||||
close(psess->fds.event);
|
||||
ralloc_free(psess);
|
||||
|
||||
pan_kmod_dev_free(session->dev, session);
|
||||
|
||||
mesa_logd("perf session destroyed");
|
||||
}
|
||||
|
||||
const struct pan_kmod_ops panthor_kmod_ops = {
|
||||
.dev_create = panthor_kmod_dev_create,
|
||||
.dev_destroy = panthor_kmod_dev_destroy,
|
||||
|
|
@ -1319,4 +1827,10 @@ const struct pan_kmod_ops panthor_kmod_ops = {
|
|||
.vm_query_state = panthor_kmod_vm_query_state,
|
||||
.query_timestamp = panthor_kmod_query_timestamp,
|
||||
.bo_set_label = panthor_kmod_bo_label,
|
||||
.perf_create = panthor_kmod_perf_init,
|
||||
.perf_enable = panthor_kmod_perf_enable,
|
||||
.perf_disable = panthor_kmod_perf_disable,
|
||||
.perf_dump = panthor_kmod_perf_dump,
|
||||
.perf_query_layout = panthor_kmod_perf_query_layout,
|
||||
.perf_destroy = panthor_kmod_perf_destroy,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -17,6 +17,13 @@ pan_query_l2_slices(const struct pan_kmod_dev_props *props)
|
|||
return ((props->mem_features >> 8) & 0xF) + 1;
|
||||
}
|
||||
|
||||
unsigned
|
||||
pan_query_bus_width(const struct pan_kmod_dev_props *props)
|
||||
{
|
||||
/* BUS_WIDTH is L2_FEATURES[31:24] log2 */
|
||||
return 1 << ((props->l2_features >> 24) & 0xF);
|
||||
}
|
||||
|
||||
struct pan_tiler_features
|
||||
pan_query_tiler_features(const struct pan_kmod_dev_props *props)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -21,6 +21,8 @@ struct pan_kmod_vm;
|
|||
|
||||
unsigned pan_query_l2_slices(const struct pan_kmod_dev_props *props);
|
||||
|
||||
unsigned pan_query_bus_width(const struct pan_kmod_dev_props *props);
|
||||
|
||||
struct pan_tiler_features
|
||||
pan_query_tiler_features(const struct pan_kmod_dev_props *props);
|
||||
|
||||
|
|
|
|||
|
|
@ -66,34 +66,34 @@ const struct pan_model pan_model_list[] = {
|
|||
MIDGARD_MODEL(0x860, "T860", "T86x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)),
|
||||
MIDGARD_MODEL(0x880, "T880", "T88x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)),
|
||||
|
||||
BIFROST_MODEL(PAN_PROD_ID(6, 0, 0), "G71", "TMIx", MODEL_ANISO(NONE), MODEL_TB_SIZES( 4096, 4096)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(6, 2, 1), "G72", "THEx", MODEL_ANISO(R0P3), MODEL_TB_SIZES( 8192, 4096)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 0, 0), "G51", "TSIx", MODEL_ANISO(R1P1), MODEL_TB_SIZES( 8192, 8192)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 0, 3), "G31", "TDVx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 2, 1), "G76", "TNOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 2, 2), "G52", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 4, 2), "G52 r1", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(6, 0, 0), "G71", "G71", MODEL_ANISO(NONE), MODEL_TB_SIZES( 4096, 4096)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(6, 2, 1), "G72", "G72", MODEL_ANISO(R0P3), MODEL_TB_SIZES( 8192, 4096)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 0, 0), "G51", "G51", MODEL_ANISO(R1P1), MODEL_TB_SIZES( 8192, 8192)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 0, 3), "G31", "G31", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 2, 1), "G76", "G76", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 2, 2), "G52", "G52", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
|
||||
BIFROST_MODEL(PAN_PROD_ID(7, 4, 2), "G52 r1", "G52", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)),
|
||||
|
||||
VALHALL_MODEL(PAN_PROD_ID(9, 0, 1), 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
VALHALL_MODEL(PAN_PROD_ID(9, 0, 1), 0, "G57", "G77", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
MODEL_RATES(2, 4, 32)),
|
||||
VALHALL_MODEL(PAN_PROD_ID(9, 0, 3), 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
VALHALL_MODEL(PAN_PROD_ID(9, 0, 3), 0, "G57", "G77", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
MODEL_RATES(2, 4, 32)),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 8, 7), 0, "G610", "TVIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 8, 7), 0, "G610", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384),
|
||||
MODEL_RATES(4, 8, 64)),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 0, "G310v1", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 0, "G310v1", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
MODEL_RATES(2, 2, 16)),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 1, "G310v2", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 1, "G310v2", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
MODEL_RATES(2, 4, 32)),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 2, "G310v3", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 2, "G310v3", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192),
|
||||
MODEL_RATES(4, 4, 48)),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 3, "G310v4", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 3, "G310v4", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384),
|
||||
MODEL_RATES(4, 8, 48)),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 4, "G310v5", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384),
|
||||
VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 4, "G310v5", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384),
|
||||
MODEL_RATES(4, 8, 64)),
|
||||
|
||||
FIFTHGEN_MODEL(PAN_PROD_ID(12, 8, 0), 4, "G720", "TTIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 32768),
|
||||
FIFTHGEN_MODEL(PAN_PROD_ID(12, 8, 0), 4, "G720", "G720", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 32768),
|
||||
MODEL_RATES(4, 8, 128)),
|
||||
FIFTHGEN_MODEL(PAN_PROD_ID(13, 8, 0), 4, "G725", "TKRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
|
||||
FIFTHGEN_MODEL(PAN_PROD_ID(13, 8, 0), 4, "G725", "G725", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536),
|
||||
MODEL_RATES(4, 8, 128)),
|
||||
};
|
||||
/* clang-format on */
|
||||
|
|
|
|||
|
|
@ -1,158 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="TDVx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles"/>
|
||||
<event offset="28" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="30" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="33" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions"/>
|
||||
<event offset="34" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions"/>
|
||||
<event offset="35" counter="TEX_MSGI_NUM_QUADS" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads"/>
|
||||
<event offset="36" counter="TEX_DFCH_NUM_PASSES" title="Core Texture Quads" name="Texture issues" description="The number of quad-width filtering passes." units="issues"/>
|
||||
<event offset="37" counter="TEX_DFCH_NUM_PASSES_MISS" title="Core Texture Quads" name="Descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests"/>
|
||||
<event offset="38" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" title="Core Texture Quads" name="Mipmapped texture issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues"/>
|
||||
<event offset="39" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" title="Core Texture Quads" name="Trilinear filtered issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues"/>
|
||||
<event offset="40" counter="TEX_TFCH_NUM_LINES_FETCHED" title="Core Texture Line Fetches" name="Line fetches" description="The number of texture line fetches from the L2 cache." units="issues"/>
|
||||
<event offset="41" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" title="Core Texture Line Fetches" name="Compressed line fetches" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues"/>
|
||||
<event offset="42" counter="TEX_TFCH_NUM_OPERATIONS" title="Core Texture Cycles" name="Cache lookups" description="The number of texture cache lookup cycles." units="requests"/>
|
||||
<event offset="43" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="63" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="TSIx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles"/>
|
||||
<event offset="28" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="30" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="33" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions"/>
|
||||
<event offset="34" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions"/>
|
||||
<event offset="35" counter="TEX_MSGI_NUM_QUADS" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads"/>
|
||||
<event offset="36" counter="TEX_DFCH_NUM_PASSES" title="Core Texture Quads" name="Texture issues" description="The number of quad-width filtering passes." units="issues"/>
|
||||
<event offset="37" counter="TEX_DFCH_NUM_PASSES_MISS" title="Core Texture Quads" name="Descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests"/>
|
||||
<event offset="38" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" title="Core Texture Quads" name="Mipmapped texture issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues"/>
|
||||
<event offset="39" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" title="Core Texture Quads" name="Trilinear filtered issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues"/>
|
||||
<event offset="40" counter="TEX_TFCH_NUM_LINES_FETCHED" title="Core Texture Line Fetches" name="Line fetches" description="The number of texture line fetches from the L2 cache." units="issues"/>
|
||||
<event offset="41" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" title="Core Texture Line Fetches" name="Compressed line fetches" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues"/>
|
||||
<event offset="42" counter="TEX_TFCH_NUM_OPERATIONS" title="Core Texture Cycles" name="Cache lookups" description="The number of texture cache lookup cycles." units="requests"/>
|
||||
<event offset="43" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="63" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -1,160 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="TGOx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="17" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="21" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles"/>
|
||||
<event offset="28" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="30" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="33" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions"/>
|
||||
<event offset="34" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions"/>
|
||||
<event offset="35" counter="TEX_MSGI_NUM_QUADS" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads"/>
|
||||
<event offset="36" counter="TEX_DFCH_NUM_PASSES" title="Core Texture Quads" name="Texture issues" description="The number of quad-width filtering passes." units="issues"/>
|
||||
<event offset="37" counter="TEX_DFCH_NUM_PASSES_MISS" title="Core Texture Quads" name="Descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests"/>
|
||||
<event offset="38" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" title="Core Texture Quads" name="Mipmapped texture issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues"/>
|
||||
<event offset="39" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" title="Core Texture Quads" name="Trilinear filtered issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues"/>
|
||||
<event offset="40" counter="TEX_TFCH_NUM_LINES_FETCHED" title="Core Texture Line Fetches" name="Line fetches" description="The number of texture line fetches from the L2 cache." units="issues"/>
|
||||
<event offset="41" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" title="Core Texture Line Fetches" name="Compressed line fetches" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues"/>
|
||||
<event offset="42" counter="TEX_TFCH_NUM_OPERATIONS" title="Core Texture Cycles" name="Cache lookups" description="The number of texture cache lookup cycles." units="requests"/>
|
||||
<event offset="43" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="63" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -1,160 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="TNAx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES_OUT" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_QUADS_RAST" title="Core Quads" name="Partial rasterized quads" description="The number of partially-rasterized fragment quads created." units="quads"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="17" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="21" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" counter="EXEC_INSTR_FMA" title="Core PU Instructions" name="FMA instructions" description="The number of instructions issued to the FMA pipe." units="instructions"/>
|
||||
<event offset="28" counter="EXEC_INSTR_CVT" title="Core PU Instructions" name="CVT instructions" description="The number of instructions issued to the CVT pipe." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_SFU" title="Core PU Instructions" name="SFU instructions" description="The number of instructions issued to the SFU pipe." units="instructions"/>
|
||||
<event offset="30" counter="EXEC_INSTR_MSG" title="Core PU Instructions" name="Message instructions" description="The number of instructions issued to the MSG pipe." units="instructions"/>
|
||||
<event offset="31" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="EXEC_ICACHE_MISS" title="Core PU Instructions" name="Instruction cache misses" description="The number of instruction cache misses." units="requests"/>
|
||||
<event offset="33" advanced="yes" counter="EXEC_STARVE_ARITH" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where the processing unit is starved of work." units="cycles"/>
|
||||
<event offset="34" counter="CALL_BLEND_SHADER" title="Core PU Instructions" name="Blend shader calls" description="The number of blend shader invocations executed." units="instructions"/>
|
||||
<event offset="35" counter="TEX_MSGI_NUM_FLITS" title="Texture Bus" name="Input beats" description="The number of texture request message data beats." units="beats"/>
|
||||
<event offset="36" counter="TEX_DFCH_CLK_STALLED" title="Core Texture Stalls" name="Descriptor stall cycles" description="The number of cycles where a quad is stalled on texture descriptor fetch." units="cycles"/>
|
||||
<event offset="37" counter="TEX_TFCH_CLK_STALLED" title="Core Texture Stalls" name="Fetch queue stall cycles" description="The number of cycles where a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles"/>
|
||||
<event offset="38" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" title="Core Texture Stalls" name="Filtering unit stall cycles" description="The number of cycles where the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles"/>
|
||||
<event offset="39" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="40" counter="TEX_FILT_NUM_FXR_OPERATIONS" title="Core Texture Cycles" name="4x bilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement nearest or bilinear filtering." units="cycles"/>
|
||||
<event offset="41" counter="TEX_FILT_NUM_FST_OPERATIONS" title="Core Texture Cycles" name="2x trilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement trilinear filtering." units="cycles"/>
|
||||
<event offset="42" counter="TEX_MSGO_NUM_MSG" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads"/>
|
||||
<event offset="43" counter="TEX_MSGO_NUM_FLITS" title="Texture Bus" name="Output beats" description="The number of texture response message data beats." units="beats"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="63" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -1,160 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="TOTx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES_OUT" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_QUADS_RAST" title="Core Quads" name="Partial rasterized quads" description="The number of partially-rasterized fragment quads created." units="quads"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="17" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="21" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" counter="EXEC_INSTR_FMA" title="Core PU Instructions" name="FMA instructions" description="The number of instructions issued to the FMA pipe." units="instructions"/>
|
||||
<event offset="28" counter="EXEC_INSTR_CVT" title="Core PU Instructions" name="CVT instructions" description="The number of instructions issued to the CVT pipe." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_SFU" title="Core PU Instructions" name="SFU instructions" description="The number of instructions issued to the SFU pipe." units="instructions"/>
|
||||
<event offset="30" counter="EXEC_INSTR_MSG" title="Core PU Instructions" name="Message instructions" description="The number of instructions issued to the MSG pipe." units="instructions"/>
|
||||
<event offset="31" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="EXEC_ICACHE_MISS" title="Core PU Instructions" name="Instruction cache misses" description="The number of instruction cache misses." units="requests"/>
|
||||
<event offset="33" advanced="yes" counter="EXEC_STARVE_ARITH" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where the processing unit is starved of work." units="cycles"/>
|
||||
<event offset="34" counter="CALL_BLEND_SHADER" title="Core PU Instructions" name="Blend shader calls" description="The number of blend shader invocations executed." units="instructions"/>
|
||||
<event offset="35" counter="TEX_MSGI_NUM_FLITS" title="Texture Bus" name="Input beats" description="The number of texture request message data beats." units="beats"/>
|
||||
<event offset="36" counter="TEX_DFCH_CLK_STALLED" title="Core Texture Stalls" name="Descriptor stall cycles" description="The number of cycles where a quad is stalled on texture descriptor fetch." units="cycles"/>
|
||||
<event offset="37" counter="TEX_TFCH_CLK_STALLED" title="Core Texture Stalls" name="Fetch queue stall cycles" description="The number of cycles where a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles"/>
|
||||
<event offset="38" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" title="Core Texture Stalls" name="Filtering unit stall cycles" description="The number of cycles where the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles"/>
|
||||
<event offset="39" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="40" counter="TEX_FILT_NUM_FXR_OPERATIONS" title="Core Texture Cycles" name="4x bilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement nearest or bilinear filtering." units="cycles"/>
|
||||
<event offset="41" counter="TEX_FILT_NUM_FST_OPERATIONS" title="Core Texture Cycles" name="2x trilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement trilinear filtering." units="cycles"/>
|
||||
<event offset="42" counter="TEX_MSGO_NUM_MSG" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads"/>
|
||||
<event offset="43" counter="TEX_MSGO_NUM_FLITS" title="Texture Bus" name="Output beats" description="The number of texture response message data beats." units="beats"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" counter="SHADER_CORE_ACTIVE" title="Core Cycles" name="Any active" description="The number of cycles where the shader core is processing either a non-fragment workload or a fragment workload." units="cycles"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="63" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="TMIx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles"/>
|
||||
<event offset="28" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="30" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="33" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions"/>
|
||||
<event offset="34" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions"/>
|
||||
<event offset="35" counter="TEX_INSTR" title="Core Texture Requests" name="Texture requests" description="The number of thread-width texture operations processed." units="instructions"/>
|
||||
<event offset="36" counter="TEX_INSTR_MIPMAP" title="Core Texture Requests" name="Mipmapped texture request" description="The number of texture operations that act on a mipmapped texture." units="instructions"/>
|
||||
<event offset="37" counter="TEX_INSTR_COMPRESSED" title="Core Texture Requests" name="Compressed texture requests" description="The number of texture operations acting on a compressed texture." units="instructions"/>
|
||||
<event offset="38" counter="TEX_INSTR_3D" title="Core Texture Requests" name="3D texture requests" description="The number of texture operations acting on a 3D texture." units="instructions"/>
|
||||
<event offset="39" counter="TEX_INSTR_TRILINEAR" title="Core Texture Requests" name="Trilinear filtered requests" description="The number of texture operations using a trilinear texture filter." units="instructions"/>
|
||||
<event offset="40" counter="TEX_COORD_ISSUE" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="41" advanced="yes" counter="TEX_COORD_STALL" title="Core Texture Cycles" name="Coordinate stall cycles" description="The number of clock cycles where threads are stalled at the texel coordinate calculation stage." units="cycles"/>
|
||||
<event offset="42" advanced="yes" counter="TEX_STARVE_CACHE" title="Core Texture Cycles" name="Line fill stall cycles" description="The number of clock cycles where at least one thread is waiting for data from the texture cache, but no lookup is completed." units="cycles"/>
|
||||
<event offset="43" advanced="yes" counter="TEX_STARVE_FILTER" title="Core Texture Cycles" name="Partial data stall cycles" description="The number of clock cycles where at least one thread fetched some data from the texture cache, but no filtering operation is started." units="cycles"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC" title="Core Writes" name="Load/store write beats" description="The number of write beats sent by the load/store unit." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="63" advanced="yes" counter="BEATS_WR_OTHER" title="Core Writes" name="Other write beats" description="The number of write beats sent by any unit that is not specifically identified." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="THEx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles"/>
|
||||
<event offset="28" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="30" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="33" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions"/>
|
||||
<event offset="34" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions"/>
|
||||
<event offset="35" counter="TEX_INSTR" title="Core Texture Requests" name="Texture requests" description="The number of thread-width texture operations processed." units="instructions"/>
|
||||
<event offset="36" counter="TEX_INSTR_MIPMAP" title="Core Texture Requests" name="Mipmapped texture request" description="The number of texture operations that act on a mipmapped texture." units="instructions"/>
|
||||
<event offset="37" counter="TEX_INSTR_COMPRESSED" title="Core Texture Requests" name="Compressed texture requests" description="The number of texture operations acting on a compressed texture." units="instructions"/>
|
||||
<event offset="38" counter="TEX_INSTR_3D" title="Core Texture Requests" name="3D texture requests" description="The number of texture operations acting on a 3D texture." units="instructions"/>
|
||||
<event offset="39" counter="TEX_INSTR_TRILINEAR" title="Core Texture Requests" name="Trilinear filtered requests" description="The number of texture operations using a trilinear texture filter." units="instructions"/>
|
||||
<event offset="40" counter="TEX_COORD_ISSUE" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="41" advanced="yes" counter="TEX_COORD_STALL" title="Core Texture Cycles" name="Coordinate stall cycles" description="The number of clock cycles where threads are stalled at the texel coordinate calculation stage." units="cycles"/>
|
||||
<event offset="42" advanced="yes" counter="TEX_STARVE_CACHE" title="Core Texture Cycles" name="Line fill stall cycles" description="The number of clock cycles where at least one thread is waiting for data from the texture cache, but no lookup is completed." units="cycles"/>
|
||||
<event offset="43" advanced="yes" counter="TEX_STARVE_FILTER" title="Core Texture Cycles" name="Partial data stall cycles" description="The number of clock cycles where at least one thread fetched some data from the texture cache, but no filtering operation is started." units="cycles"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC" title="Core Writes" name="Load/store write beats" description="The number of write beats sent by the load/store unit." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="63" advanced="yes" counter="BEATS_WR_OTHER" title="Core Writes" name="Other write beats" description="The number of write beats sent by any unit that is not specifically identified." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -1,160 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="TNOx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="17" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="21" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles"/>
|
||||
<event offset="28" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="30" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions"/>
|
||||
<event offset="33" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions"/>
|
||||
<event offset="34" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions"/>
|
||||
<event offset="35" counter="TEX_MSGI_NUM_QUADS" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads"/>
|
||||
<event offset="36" counter="TEX_DFCH_NUM_PASSES" title="Core Texture Quads" name="Texture issues" description="The number of quad-width filtering passes." units="issues"/>
|
||||
<event offset="37" counter="TEX_DFCH_NUM_PASSES_MISS" title="Core Texture Quads" name="Descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests"/>
|
||||
<event offset="38" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" title="Core Texture Quads" name="Mipmapped texture issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues"/>
|
||||
<event offset="39" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" title="Core Texture Quads" name="Trilinear filtered issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues"/>
|
||||
<event offset="40" counter="TEX_TFCH_NUM_LINES_FETCHED" title="Core Texture Line Fetches" name="Line fetches" description="The number of texture line fetches from the L2 cache." units="issues"/>
|
||||
<event offset="41" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" title="Core Texture Line Fetches" name="Compressed line fetches" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues"/>
|
||||
<event offset="42" counter="TEX_TFCH_NUM_OPERATIONS" title="Core Texture Cycles" name="Cache lookups" description="The number of texture cache lookup cycles." units="requests"/>
|
||||
<event offset="43" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="63" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -1,160 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="TTRx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES_OUT" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_QUADS_RAST" title="Core Quads" name="Partial rasterized quads" description="The number of partially-rasterized fragment quads created." units="quads"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="17" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="21" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" counter="EXEC_INSTR_FMA" title="Core PU Instructions" name="FMA instructions" description="The number of instructions issued to the FMA pipe." units="instructions"/>
|
||||
<event offset="28" counter="EXEC_INSTR_CVT" title="Core PU Instructions" name="CVT instructions" description="The number of instructions issued to the CVT pipe." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_SFU" title="Core PU Instructions" name="SFU instructions" description="The number of instructions issued to the SFU pipe." units="instructions"/>
|
||||
<event offset="30" counter="EXEC_INSTR_MSG" title="Core PU Instructions" name="Message instructions" description="The number of instructions issued to the MSG pipe." units="instructions"/>
|
||||
<event offset="31" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="EXEC_ICACHE_MISS" title="Core PU Instructions" name="Instruction cache misses" description="The number of instruction cache misses." units="requests"/>
|
||||
<event offset="33" advanced="yes" counter="EXEC_STARVE_ARITH" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where the processing unit is starved of work." units="cycles"/>
|
||||
<event offset="34" counter="CALL_BLEND_SHADER" title="Core PU Instructions" name="Blend shader calls" description="The number of blend shader invocations executed." units="instructions"/>
|
||||
<event offset="35" counter="TEX_MSGI_NUM_FLITS" title="Texture Bus" name="Input beats" description="The number of texture request message data beats." units="beats"/>
|
||||
<event offset="36" counter="TEX_DFCH_CLK_STALLED" title="Core Texture Stalls" name="Descriptor stall cycles" description="The number of cycles where a quad is stalled on texture descriptor fetch." units="cycles"/>
|
||||
<event offset="37" counter="TEX_TFCH_CLK_STALLED" title="Core Texture Stalls" name="Fetch queue stall cycles" description="The number of cycles where a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles"/>
|
||||
<event offset="38" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" title="Core Texture Stalls" name="Filtering unit stall cycles" description="The number of cycles where the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles"/>
|
||||
<event offset="39" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="40" counter="TEX_FILT_NUM_FXR_OPERATIONS" title="Core Texture Cycles" name="4x bilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement nearest or bilinear filtering." units="cycles"/>
|
||||
<event offset="41" counter="TEX_FILT_NUM_FST_OPERATIONS" title="Core Texture Cycles" name="2x trilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement trilinear filtering." units="cycles"/>
|
||||
<event offset="42" counter="TEX_MSGO_NUM_MSG" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads"/>
|
||||
<event offset="43" counter="TEX_MSGO_NUM_FLITS" title="Texture Bus" name="Output beats" description="The number of texture response message data beats." units="beats"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="63" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -1,160 +0,0 @@
|
|||
<!--
|
||||
Copyright © 2017-2020 ARM Limited.
|
||||
Copyright © 2021-2022 Collabora, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
<metrics id="TBOx">
|
||||
<category name="Job Manager" per_cpu="no">
|
||||
<event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles"/>
|
||||
<event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles"/>
|
||||
<event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs"/>
|
||||
<event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks"/>
|
||||
<event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles"/>
|
||||
<event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles"/>
|
||||
<event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs"/>
|
||||
<event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks"/>
|
||||
<event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs"/>
|
||||
<event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks"/>
|
||||
<event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles"/>
|
||||
<event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles"/>
|
||||
<event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles"/>
|
||||
<event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles"/>
|
||||
<event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles"/>
|
||||
</category>
|
||||
<category name="Tiler" per_cpu="no">
|
||||
<event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles"/>
|
||||
<event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
|
||||
<event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
|
||||
<event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
|
||||
<event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
|
||||
<event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
|
||||
<event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
|
||||
<event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
|
||||
<event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
|
||||
<event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
|
||||
<event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats"/>
|
||||
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles"/>
|
||||
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles"/>
|
||||
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
|
||||
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
|
||||
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests"/>
|
||||
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles"/>
|
||||
<event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles"/>
|
||||
<event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles"/>
|
||||
</category>
|
||||
<category name="Memory System" per_cpu="no">
|
||||
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests"/>
|
||||
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests"/>
|
||||
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests"/>
|
||||
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests"/>
|
||||
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles"/>
|
||||
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles"/>
|
||||
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests"/>
|
||||
<event offset="25" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests"/>
|
||||
<event offset="26" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests"/>
|
||||
<event offset="27" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests"/>
|
||||
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests"/>
|
||||
<event offset="29" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions"/>
|
||||
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
|
||||
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
|
||||
<event offset="32" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats"/>
|
||||
<event offset="33" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="34" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="35" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="36" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="37" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
|
||||
<event offset="38" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
|
||||
<event offset="39" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
|
||||
<event offset="40" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
|
||||
<event offset="41" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
|
||||
<event offset="42" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions"/>
|
||||
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
|
||||
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
|
||||
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
|
||||
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
|
||||
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats"/>
|
||||
<event offset="48" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles"/>
|
||||
<event offset="49" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
|
||||
<event offset="50" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
|
||||
<event offset="51" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
|
||||
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions"/>
|
||||
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles"/>
|
||||
</category>
|
||||
<category name="Shader Core" per_cpu="no">
|
||||
<event offset="4" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles"/>
|
||||
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES_OUT" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives"/>
|
||||
<event offset="6" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
|
||||
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles"/>
|
||||
<event offset="9" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
|
||||
<event offset="10" counter="FRAG_PARTIAL_QUADS_RAST" title="Core Quads" name="Partial rasterized quads" description="The number of partially-rasterized fragment quads created." units="quads"/>
|
||||
<event offset="11" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads"/>
|
||||
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
|
||||
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
|
||||
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
|
||||
<event offset="15" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
|
||||
<event offset="16" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
|
||||
<event offset="17" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps"/>
|
||||
<event offset="18" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
|
||||
<event offset="19" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles"/>
|
||||
<event offset="20" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
|
||||
<event offset="21" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps"/>
|
||||
<event offset="22" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles"/>
|
||||
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
|
||||
<event offset="24" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
|
||||
<event offset="25" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles"/>
|
||||
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles"/>
|
||||
<event offset="27" counter="EXEC_INSTR_FMA" title="Core PU Instructions" name="FMA instructions" description="The number of instructions issued to the FMA pipe." units="instructions"/>
|
||||
<event offset="28" counter="EXEC_INSTR_CVT" title="Core PU Instructions" name="CVT instructions" description="The number of instructions issued to the CVT pipe." units="instructions"/>
|
||||
<event offset="29" counter="EXEC_INSTR_SFU" title="Core PU Instructions" name="SFU instructions" description="The number of instructions issued to the SFU pipe." units="instructions"/>
|
||||
<event offset="30" counter="EXEC_INSTR_MSG" title="Core PU Instructions" name="Message instructions" description="The number of instructions issued to the MSG pipe." units="instructions"/>
|
||||
<event offset="31" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions"/>
|
||||
<event offset="32" advanced="yes" counter="EXEC_ICACHE_MISS" title="Core PU Instructions" name="Instruction cache misses" description="The number of instruction cache misses." units="requests"/>
|
||||
<event offset="33" advanced="yes" counter="EXEC_STARVE_ARITH" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where the processing unit is starved of work." units="cycles"/>
|
||||
<event offset="34" counter="CALL_BLEND_SHADER" title="Core PU Instructions" name="Blend shader calls" description="The number of blend shader invocations executed." units="instructions"/>
|
||||
<event offset="35" counter="TEX_MSGI_NUM_FLITS" title="Texture Bus" name="Input beats" description="The number of texture request message data beats." units="beats"/>
|
||||
<event offset="36" counter="TEX_DFCH_CLK_STALLED" title="Core Texture Stalls" name="Descriptor stall cycles" description="The number of cycles where a quad is stalled on texture descriptor fetch." units="cycles"/>
|
||||
<event offset="37" counter="TEX_TFCH_CLK_STALLED" title="Core Texture Stalls" name="Fetch queue stall cycles" description="The number of cycles where a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles"/>
|
||||
<event offset="38" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" title="Core Texture Stalls" name="Filtering unit stall cycles" description="The number of cycles where the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles"/>
|
||||
<event offset="39" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles"/>
|
||||
<event offset="40" counter="TEX_FILT_NUM_FXR_OPERATIONS" title="Core Texture Cycles" name="4x bilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement nearest or bilinear filtering." units="cycles"/>
|
||||
<event offset="41" counter="TEX_FILT_NUM_FST_OPERATIONS" title="Core Texture Cycles" name="2x trilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement trilinear filtering." units="cycles"/>
|
||||
<event offset="42" counter="TEX_MSGO_NUM_MSG" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads"/>
|
||||
<event offset="43" counter="TEX_MSGO_NUM_FLITS" title="Texture Bus" name="Output beats" description="The number of texture response message data beats." units="beats"/>
|
||||
<event offset="44" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles"/>
|
||||
<event offset="45" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles"/>
|
||||
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles"/>
|
||||
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles"/>
|
||||
<event offset="48" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles"/>
|
||||
<event offset="49" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions"/>
|
||||
<event offset="50" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="51" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles"/>
|
||||
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions"/>
|
||||
<event offset="53" counter="SHADER_CORE_ACTIVE" title="Core Cycles" name="Any active" description="The number of cycles where the shader core is processing either a non-fragment workload or a fragment workload." units="cycles"/>
|
||||
<event offset="54" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
|
||||
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="56" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats"/>
|
||||
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="58" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats"/>
|
||||
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats"/>
|
||||
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
|
||||
<event offset="61" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats"/>
|
||||
<event offset="62" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats"/>
|
||||
<event offset="63" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats"/>
|
||||
</category>
|
||||
</metrics>
|
||||
345
src/panfrost/perf/generated/G1.xml
Normal file
345
src/panfrost/perf/generated/G1.xml
Normal file
|
|
@ -0,0 +1,345 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G1">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliBinningQueueActiveCy" title="Binning phase queue active cycles" description="The number of cycles that the binning phase queue is processing work. The binning phase includes position shading, culling, and binning." units="cycles" equation="(ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)" />
|
||||
<event name="MaliBinningQueueAssignStallCy" title="Binning phase queue endpoint stall cycles" description="The number of cycles the binning phase queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_TILER_READY_BLOCKED" offset="70" />
|
||||
<event name="MaliBinningQueueIRQActiveCy" title="Binning phase queue interrupt pending cycles" description="The number of cycles that the binning phase queue IRQ was pending." units="cycles" counter="ITER_TILER_IRQ_ACTIVE" offset="68" />
|
||||
<event name="MaliBinningQueueJob" title="Binning phase jobs" description="The number of binning phase jobs processed." units="jobs" counter="ITER_TILER_JOB_COMPLETED" offset="65" />
|
||||
<event name="MaliBinningQueueTask" title="Binning phase tasks" description="The number of binning phase tasks processed." units="tasks" counter="ITER_TILER_IDVS_TASK_COMPLETED" offset="66" />
|
||||
<event name="MaliBinningQueueUtil" title="Binning phase queue utilization" description="The binning phase queue utilization compared against the GPU active cycles. The binning phase includes position shading, culling, and binning." units="percent" equation="(((ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliBinningQueuedCy" title="Binning phase work queued cycles" description="The number of cycles that the binning phase queue has work queued. The binning phase includes position shading, culling, and binning." units="cycles" counter="ITER_TILER_ACTIVE" offset="64" />
|
||||
<event name="MaliCS0WaitStallCy" title="Command stream 0 wait stall cycles" description="The number of cycles that command stream interface 0 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF0_WAIT_BLOCKED" offset="84" />
|
||||
<event name="MaliCS1WaitStallCy" title="Command stream 1 wait stall cycles" description="The number of cycles that command stream interface 1 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF1_WAIT_BLOCKED" offset="90" />
|
||||
<event name="MaliCS2WaitStallCy" title="Command stream 2 wait stall cycles" description="The number of cycles that command stream interface 2 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF2_WAIT_BLOCKED" offset="96" />
|
||||
<event name="MaliCS3WaitStallCy" title="Command stream 3 wait stall cycles" description="The number of cycles that command stream interface 3 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF3_WAIT_BLOCKED" offset="102" />
|
||||
<event name="MaliCS4WaitStallCy" title="Command stream 4 wait stall cycles" description="The number of cycles that command stream interface 4 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF4_WAIT_BLOCKED" offset="108" />
|
||||
<event name="MaliCS5WaitStallCy" title="Command stream 5 wait stall cycles" description="The number of cycles that command stream interface 5 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF5_WAIT_BLOCKED" offset="114" />
|
||||
<event name="MaliCSDoorbellIRQCy" title="Command stream doorbell interrupt pending cycles" description="The number of cycles that command stream doorbell has an IRQ pending." units="cycles" counter="DOORBELL_IRQ_ACTIVE" offset="14" />
|
||||
<event name="MaliCSFCEUActiveCy" title="Command execution unit active cycles" description="The number of cycles that the CEU is processing commands." units="cycles" counter="CEU_ACTIVE" offset="16" />
|
||||
<event name="MaliCSFCEUUtil" title="Command execution unit utilization" description="The CSF command execution unit utilization compared against the GPU active cycles." units="percent" equation="((CEU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCSFCS0ActiveCy" title="Command stream 0 active cycles" description="The number of cycles that command stream interface 0 contained an enabled command stream." units="cycles" counter="CSHWIF0_ENABLED" offset="80" />
|
||||
<event name="MaliCSFCS1ActiveCy" title="Command stream 1 active cycles" description="The number of cycles that command stream interface 1 contained an enabled command stream." units="cycles" counter="CSHWIF1_ENABLED" offset="86" />
|
||||
<event name="MaliCSFCS2ActiveCy" title="Command stream 2 active cycles" description="The number of cycles that command stream interface 2 contained an enabled command stream." units="cycles" counter="CSHWIF2_ENABLED" offset="92" />
|
||||
<event name="MaliCSFCS3ActiveCy" title="Command stream 3 active cycles" description="The number of cycles that command stream interface 3 contained an enabled command stream." units="cycles" counter="CSHWIF3_ENABLED" offset="98" />
|
||||
<event name="MaliCSFCS4ActiveCy" title="Command stream 4 active cycles" description="The number of cycles that command stream interface 4 contained an enabled command stream." units="cycles" counter="CSHWIF4_ENABLED" offset="104" />
|
||||
<event name="MaliCSFCS5ActiveCy" title="Command stream 5 active cycles" description="The number of cycles that command stream interface 5 contained an enabled command stream." units="cycles" counter="CSHWIF5_ENABLED" offset="110" />
|
||||
<event name="MaliCSFLSUActiveCy" title="Command load/store unit active cycles" description="The number of cycles that the load-store unit is processing commands." units="cycles" counter="LSU_ACTIVE" offset="21" />
|
||||
<event name="MaliCSFLSUUtil" title="Command load/store unit utilization" description="The CSF load/store unit utilization compared against the GPU active cycles." units="percent" equation="((LSU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCSFMCUActiveCy" title="MCU active cycles" description="The number of cycles when the CSF front-end MCU is actively processing." units="cycles" counter="MCU_ACTIVE" offset="5" />
|
||||
<event name="MaliCSFMCUUtil" title="Microcontroller utilization" description="The CSF MCU utilization compared against the GPU active cycles." units="percent" equation="((MCU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCompQueueActiveCy" title="Compute queue active cycles" description="The number of cycles that the compute queue is processing work." units="cycles" equation="(ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)" />
|
||||
<event name="MaliCompQueueAssignStallCy" title="Compute queue endpoint stall cycles" description="The number of cycles the compute queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_COMP_READY_BLOCKED" offset="38" />
|
||||
<event name="MaliCompQueueDrainStallCy" title="Compute queue endpoint drain stall cycles" description="The number of cycles the compute queue is waiting for endpoints to drain." units="cycles" counter="ITER_COMP_EP_DRAIN" offset="39" />
|
||||
<event name="MaliCompQueueIRQActiveCy" title="Compute queue interrupt pending cycles" description="The number of cycles that the compute queue IRQ was pending." units="cycles" counter="ITER_COMP_IRQ_ACTIVE" offset="36" />
|
||||
<event name="MaliCompQueueJob" title="Compute jobs" description="The number of compute jobs processed." units="jobs" counter="ITER_COMP_JOB_COMPLETED" offset="33" />
|
||||
<event name="MaliCompQueueTask" title="Compute tasks" description="The number of compute tasks processed." units="tasks" counter="ITER_COMP_TASK_COMPLETED" offset="34" />
|
||||
<event name="MaliCompQueueUtil" title="Compute queue utilization" description="The compute queue utilization compared against the GPU active cycles." units="percent" equation="(((ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCompQueuedCy" title="Compute work queued cycles" description="The number of cycles that the compute queue has work queued." units="cycles" counter="ITER_COMP_ACTIVE" offset="32" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="4" />
|
||||
<event name="MaliGPUAnyQueueActiveCy" title="Any queue active cycles" description="The number of cycles when any GPU queue is active." units="cycles" counter="GPU_ITER_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQ" title="GPU interrupts" description="The number of interrupts raised by the GPU to the CPU." units="interrupts" counter="GPU_IRQ_COUNT" offset="11" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="10" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliMainQueueActiveCy" title="Main phase queue active cycles" description="The number of cycles that the main phase queue is processing work. The main phase includes deferred vertex shading and all fragment shading." units="cycles" equation="(ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)" />
|
||||
<event name="MaliMainQueueAssignStallCy" title="Main phase queue endpoint stall cycles" description="The number of cycles the main phase queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_FRAG_READY_BLOCKED" offset="54" />
|
||||
<event name="MaliMainQueueIRQActiveCy" title="Main phase queue interrupt pending cycles" description="The number of cycles that the main phase queue IRQ was pending." units="cycles" counter="ITER_FRAG_IRQ_ACTIVE" offset="52" />
|
||||
<event name="MaliMainQueueJob" title="Main phase jobs" description="The number of main phase jobs processed." units="jobs" counter="ITER_FRAG_JOB_COMPLETED" offset="49" />
|
||||
<event name="MaliMainQueueTask" title="Main phase tasks" description="The number of fragment tasks processed." units="tasks" counter="ITER_FRAG_TASK_COMPLETED" offset="50" />
|
||||
<event name="MaliMainQueueUtil" title="Main phase queue utilization" description="The main phase queue utilization compared against the GPU active cycles. The main phase includes deferred vertex shading and all fragment shading." units="percent" equation="(((ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliMainQueuedCy" title="Main phase work queued cycles" description="The number of cycles that the main phase queue has work queued. The main phase includes deferred vertex shading and all fragment shading." units="cycles" counter="ITER_FRAG_ACTIVE" offset="48" />
|
||||
<event name="MaliTilerQueueDrainStallCy" title="Binning phase queue endpoint drain stall cycles" description="The number of cycles the binning phase queue is waiting for endpoints to drain." units="cycles" counter="ITER_TILER_EP_DRAIN" offset="71" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheCleanUnique" title="Input internal clean unique requests" description="The number of L2 cache line clean unique requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_CU" offset="13" />
|
||||
<event name="MaliL2CacheEvict" title="Input internal evict requests" description="The number of L2 cache line evict requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_EVICT" offset="12" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="13" />
|
||||
<event name="MaliL2CacheFlushCy" title="L2 cache flush cycles" description="The number of cycles spent flushing GPU L2 caches." units="cycles" counter="CACHE_FLUSH_CYCLES" offset="12" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="max((((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)) - (EXEC_INSTR_SLOT_1)), (EXEC_INSTR_SLOT_1), (EXEC_INSTR_SFU) * 4)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="((max((((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)) - (EXEC_INSTR_SLOT_1)), (EXEC_INSTR_SLOT_1), (EXEC_INSTR_SFU) * 4)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAnyActiveCy" title="Any workload active cycles" description="The number of cycles when the shader core is processing either a non-fragment workload or a fragment workload." units="cycles" counter="SHADER_CORE_ACTIVE" offset="53" />
|
||||
<event name="MaliAnyUtil" title="Shader core clock ratio" description="An estimate of shader core use relative to the GPU top-level clock." units="percent" equation="((SHADER_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliAttrIssueCy" title="Attribute unit issue cycles" description="The total number of attribute unit issue cycles." units="cycles" counter="ATTR_ACTIVE" offset="108" />
|
||||
<event name="MaliAttrUtil" title="Attribute unit utilization" description="The percentage utilization of the attribute unit." units="percent" equation="((ATTR_ACTIVE) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliBlendIssueCy" title="Blend unit issue cycles" description="The total number of blend unit issue cycles." units="cycles" counter="BLEND_ACTIVE" offset="107" />
|
||||
<event name="MaliBlendUtil" title="Blend unit utilization" description="The percentage utilization of the blend unit." units="percent" equation="((BLEND_ACTIVE) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliCompOrBinningActiveCy" title="Compute or binning phase active cycles" description="The number of cycles when the shader core is processing some compute or binning phase workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliCompOrBinningUtil" title="Compute or binning phase utilization" description="The utilization of the shader core compute or binning phase path." units="percent" equation="((COMPUTE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreAllRegsWarp" title="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps" counter="WARP_REG_SIZE_64" offset="17" />
|
||||
<event name="MaliCoreAllRegsWarpRate" title="All registers warp percentage" description="The percentage of warps that require more than 32 registers." units="percent" equation="((WARP_REG_SIZE_64) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreFragWarpOcc" title="Fragment warp occupancy" description="The thread occupancy of the fragment warps." units="percent" equation="((FRAG_SHADER_THREADS) / ((FRAG_WARPS) * (16))) * 100" />
|
||||
<event name="MaliCoreFullWarp" title="Full warps" description="The number of warps that have a full thread slot allocation." units="warps" counter="FULL_QUAD_WARPS" offset="21" />
|
||||
<event name="MaliCoreFullWarpRate" title="Full warp percentage" description="The percentage of warps that have a full thread slot allocation." units="percent" equation="((FULL_QUAD_WARPS) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliDefVertWarp" title="Deferred vertex warps" description="The number of deferred vertex warps created." units="warps" counter="DVS_WARPS" offset="106" />
|
||||
<event name="MaliEngArithInstr" title="Arithmetic instruction issue cycles" description="The total number of instructions issued to the FMA, CVT, and SFU pipes." units="instructions" equation="(EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)" />
|
||||
<event name="MaliEngAttrBackpressureCy" title="Attribute unit backpressure cycles" description="The number of cycles new work could not be sent to the attribute unit." units="cycles" counter="EXEC_MSG_STALLED_ATTR" offset="117" />
|
||||
<event name="MaliEngAttrBackpressureRate" title="Attribute unit backpressure percentage" description="The percentage of cycles new work could not be sent to the attribute unit." units="percent" equation="((EXEC_MSG_STALLED_ATTR) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngBlendBackpressureCy" title="Blend unit backpressure cycles" description="The number of cycles new work could not be sent to the blend unit." units="cycles" counter="EXEC_MSG_STALLED_BLEND" offset="114" />
|
||||
<event name="MaliEngBlendBackpressureRate" title="Blend unit backpressure percentage" description="The percentage of cycles new work could not be sent to the blend unit." units="percent" equation="((EXEC_MSG_STALLED_BLEND) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngCVTInstr" title="Arithmetic CVT pipe instructions" description="The number of instructions issued to the CVT pipe." units="instructions" counter="EXEC_INSTR_CVT" offset="28" />
|
||||
<event name="MaliEngCVTPipeUtil" title="CVT pipe utilization" description="Defines the utilization of the CVT pipes." units="percent" equation="((EXEC_INSTR_CVT) / (2 * (EXEC_CORE_ACTIVE))) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="31" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngFMAInstr" title="Arithmetic FMA pipe instructions" description="The number of instructions issued to the FMA pipe." units="instructions" counter="EXEC_INSTR_FMA" offset="27" />
|
||||
<event name="MaliEngFMAPipeUtil" title="FMA pipe utilization" description="The utilization of the FMA pipes." units="percent" equation="((EXEC_INSTR_FMA) / (2 * (EXEC_CORE_ACTIVE))) * 100" />
|
||||
<event name="MaliEngLSBackpressureCy" title="Load/store unit backpressure cycles" description="The number of cycles new work could not be sent to the load/store unit." units="cycles" counter="EXEC_MSG_STALLED_LSC" offset="116" />
|
||||
<event name="MaliEngLSBackpressureRate" title="Load/store unit backpressure percentage" description="The percentage of cycles new work could not be sent to the load/store unit." units="percent" equation="((EXEC_MSG_STALLED_LSC) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngNarrowInstr" title="Narrow arithmetic instructions" description="The number of narrow arithmetic instructions." units="instructions" counter="EXEC_INSTR_NARROW" offset="5" />
|
||||
<event name="MaliEngNarrowInstrRate" title="Narrow arithmetic percentage" description="The percentage of arithmetic instructions that operate on 8/16-bit types." units="percent" equation="((EXEC_INSTR_NARROW) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngRTUBackpressureCy" title="Ray tracing unit backpressure cycles" description="The number of cycles new work could not be sent to the ray tracing unit." units="cycles" counter="EXEC_MSG_STALLED_RTU" offset="109" />
|
||||
<event name="MaliEngRTUBackpressureRate" title="Ray tracing unit backpressure percentage" description="The percentage of cycles new work could not be sent to the ray tracing unit." units="percent" equation="((EXEC_MSG_STALLED_RTU) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngSFUInstr" title="Arithmetic SFU pipe instructions" description="The number of instructions issued to the SFU pipe." units="instructions" counter="EXEC_INSTR_SFU" offset="29" />
|
||||
<event name="MaliEngSFUPipeUtil" title="SFU pipe utilization" description="The utilization of the SFU pipe." units="percent" equation="(((EXEC_INSTR_SFU) * 4) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngSWBlendInstr" title="Blend shader instructions" description="The number of blend shader invocations run." units="instructions" counter="CALL_BLEND_SHADER" offset="34" />
|
||||
<event name="MaliEngSWBlendRate" title="Shader blend percentage" description="The percentage of fragments that use shader-based blending." units="percent" equation="(((CALL_BLEND_SHADER) * 4) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliEngSlot0IssueCy" title="Slot 0 arithmetic issue cycles" description="The number of arithmetic issue cycles to slot 0." units="cycles" equation="((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)) - (EXEC_INSTR_SLOT_1)" />
|
||||
<event name="MaliEngSlot1IssueCy" title="Slot 1 arithmetic issue cycles" description="The number of arithmetic issue cycles to slot 1." units="cycles" counter="EXEC_INSTR_SLOT_1" offset="118" />
|
||||
<event name="MaliEngSlotAnyIssueCy" title="Any slot arithmetic issue cycles" description="The number of arithmetic issue cycles that issue to either issue slot." units="cycles" counter="EXEC_ISSUE_SLOT_ANY" offset="119" />
|
||||
<event name="MaliEngStarveCy" title="Processing unit starvation cycles" description="The number of cycles when the processing unit is starved of work." units="cycles" counter="EXEC_STARVE_ARITH" offset="33" />
|
||||
<event name="MaliEngStarveICacheCy" title="Processing unit I-cache starvation cycles" description="The number of cycles when the processing unit is starved of work due to I-cache misses." units="cycles" counter="EXEC_STARVE_ICACHE" offset="32" />
|
||||
<event name="MaliEngTexBackpressureCy" title="Texture unit backpressure cycles" description="The number of cycles new work could not be sent to the texture unit." units="cycles" counter="EXEC_MSG_STALLED_TEX" offset="112" />
|
||||
<event name="MaliEngTexBackpressureRate" title="Texture unit backpressure percentage" description="The percentage of cycles new work could not be sent to the texture unit." units="percent" equation="((EXEC_MSG_STALLED_TEX) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngVarBackpressureCy" title="Varying unit backpressure cycles" description="The number of cycles new work could not be sent to the varying unit." units="cycles" counter="EXEC_MSG_STALLED_VARY" offset="113" />
|
||||
<event name="MaliEngVarBackpressureRate" title="Varying unit backpressure percentage" description="The percentage of cycles new work could not be sent to the varying unit." units="percent" equation="((EXEC_MSG_STALLED_VARY) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngZSBackpressureCy" title="ZS unit backpressure cycles" description="The number of cycles new work could not be sent to the depth/stencil test unit." units="cycles" counter="EXEC_MSG_STALLED_ZS" offset="115" />
|
||||
<event name="MaliEngZSBackpressureRate" title="ZS unit backpressure percentage" description="The percentage of cycles new work could not be sent to the depth/stencil test unit." units="percent" equation="((EXEC_MSG_STALLED_ZS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragInputPrim" title="Input fragment primitives" description="The number of unique primitives loaded by the fragment front-end." units="primitives" equation="((FRAG_PRIMITIVES_OUT) + (FRAG_PRIMITIVES_HSR_CULLED)) - (FRAG_PRIMITIVES_OUT_PRE_PASS)" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (4 * (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (4 * (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliFragMainPassStallCy" title="Fragment main pass stall cycles" description="The number of cycles when the fragment main pass is stalled waiting for prepass results." units="cycles" counter="FRAG_MAIN_PASS_STALLED_BY_PRE_PASS" offset="105" />
|
||||
<event name="MaliFragMainPassStallRate" title="Fragment main pass stall percentage" description="The percentage of cycles when the fragment main pass is stalled by the fragment prepass." units="percent" equation="((FRAG_MAIN_PASS_STALLED_BY_PRE_PASS) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragMainThread" title="Fragment main pass threads" description="The number of fragment threads started in the main pass." units="threads" equation="((FRAG_WARPS) - (FRAG_WARPS_PRE_PASS)) * (16)" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="(FRAG_SHADER_THREADS) / ((ITER_FRAG_TASK_COMPLETED) * (64) * (64))" />
|
||||
<event name="MaliFragPrepassCullPrim" title="Fragment prepass culled primitives" description="The number of primitives culled by the fragment prepass." units="primitives" counter="FRAG_PRIMITIVES_HSR_CULLED" offset="98" />
|
||||
<event name="MaliFragPrepassCullPrimRate" title="Fragment prepass primitive culling percentage" description="The percentage of primitives culled by the fragment prepass." units="percent" equation="((FRAG_PRIMITIVES_HSR_CULLED) / (((FRAG_PRIMITIVES_OUT) + (FRAG_PRIMITIVES_HSR_CULLED)) - (FRAG_PRIMITIVES_OUT_PRE_PASS))) * 100" />
|
||||
<event name="MaliFragPrepassEZSUpdateQd" title="Fragment prepass early ZS updated quads" description="The number of quads that update the fragment prepass using early ZS." units="quads" counter="FRAG_QUADS_HSR_BUF_EZS_UPDATE" offset="101" />
|
||||
<event name="MaliFragPrepassKillQd" title="Fragment prepass killed quads" description="The number of quads that are killed by the fragment prepass." units="quads" counter="FRAG_QUADS_HSR_BUF_KILLED" offset="103" />
|
||||
<event name="MaliFragPrepassKillRate" title="Fragment prepass killed quad percentage" description="The percentage of tested quads that are killed by the fragment prepass." units="percent" equation="((FRAG_QUADS_HSR_BUF_KILLED) / (FRAG_QUADS_HSR_BUF_TEST)) * 100" />
|
||||
<event name="MaliFragPrepassPrim" title="Loaded fragment prepass primitives" description="The number of primitives loaded by the fragment front-end for the fragment prepass." units="primitives" counter="FRAG_PRIMITIVES_OUT_PRE_PASS" offset="99" />
|
||||
<event name="MaliFragPrepassPrimRate" title="Fragment prepass primitive percentage" description="The percentage of primitives processed by fragment prepass hidden surface removal." units="percent" equation="((FRAG_PRIMITIVES_OUT_PRE_PASS) / (((FRAG_PRIMITIVES_OUT) + (FRAG_PRIMITIVES_HSR_CULLED)) - (FRAG_PRIMITIVES_OUT_PRE_PASS))) * 100" />
|
||||
<event name="MaliFragPrepassSkipPrimRate" title="Fragment prepass skipped primitive percentage" description="The percentage of primitives that are skipped by the fragment prepass." units="percent" equation="((FRAG_PRIMITIVES_HSR_DISABLED) / (((FRAG_PRIMITIVES_OUT) + (FRAG_PRIMITIVES_HSR_CULLED)) - (FRAG_PRIMITIVES_OUT_PRE_PASS))) * 100" />
|
||||
<event name="MaliFragPrepassSkippedPrim" title="Fragment prepass skipped primitives" description="The number of primitives that are skipped by the fragment prepass." units="primitives" counter="FRAG_PRIMITIVES_HSR_DISABLED" offset="100" />
|
||||
<event name="MaliFragPrepassTestQd" title="Fragment prepass tested quads" description="The number of quads that are tested by the fragment prepass." units="quads" counter="FRAG_QUADS_HSR_BUF_TEST" offset="102" />
|
||||
<event name="MaliFragPrepassThread" title="Fragment prepass threads" description="The number of fragment threads started in the prepass." units="threads" equation="(FRAG_WARPS_PRE_PASS) * (16)" />
|
||||
<event name="MaliFragPrepassWarp" title="Fragment prepass warps" description="The number of fragment prepass warps created." units="warps" counter="FRAG_WARPS_PRE_PASS" offset="104" />
|
||||
<event name="MaliFragPrepassWarpRate" title="Fragment prepass warp percentage" description="The percentage of warps being processed by the fragment prepass." units="percent" equation="((FRAG_WARPS_PRE_PASS) / ((FRAG_WARPS) - (FRAG_WARPS_PRE_PASS))) * 100" />
|
||||
<event name="MaliFragPrim" title="Loaded fragment primitives" description="The number of primitives loaded by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES_OUT" offset="97" />
|
||||
<event name="MaliFragRastCoarseQd" title="Rasterized coarse quads" description="The number of coarse quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_COARSE" offset="68" />
|
||||
<event name="MaliFragRastPartQd" title="Partial rasterized fine quads" description="The number of rasterized fine quads created with partial coverage." units="quads" counter="FRAG_PARTIAL_QUADS_RAST" offset="10" />
|
||||
<event name="MaliFragRastPartQdRate" title="Partial coverage percentage" description="The percentage of rasterized fine quads that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_QUADS_RAST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragShadRate" title="Fragment shading rate" description="The percentage of coarse quads generated relative to fine quads rasterized." units="percent" equation="((FRAG_QUADS_COARSE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="((FRAG_WARPS) * (16)) / 4" />
|
||||
<event name="MaliFragThread" title="All fragment threads" description="The number of fragment threads started in the prepass and main pass." units="threads" counter="FRAG_SHADER_THREADS" offset="69" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / (((FRAG_WARPS) - (FRAG_WARPS_PRE_PASS)) * (16))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (4 * (FRAG_PTILES))) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliMainActiveCy" title="Main phase active cycles" description="The number of cycles when the shader core is processing a main phase workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliMainUtil" title="Main phase utilization" description="The utilization of the shader core main phase path." units="percent" equation="((FRAG_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragTask" title="Non-main phase core tasks" description="The number of non-main phase tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (16)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (16))" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliRTUActiveCy" title="Ray tracing unit active cycles" description="The total number of ray tracing unit active cycles." units="cycles" counter="RT_ACTIVE" offset="122" />
|
||||
<event name="MaliRTUBLASCull" title="Ray tracing culled BLAS instances" description="The number of BLAS instances culled by by the ray tracing unit." units="issues" counter="RT_RAY_INSTANCE_CULL" offset="127" />
|
||||
<event name="MaliRTUBLASIssue" title="Ray tracing BLAS instances" description="The number of BLAS instances processed by the ray tracing unit." units="issues" counter="RT_RAY_INSTANCE" offset="125" />
|
||||
<event name="MaliRTUBoxIssue" title="Ray tracing box tests" description="The number of boxes issued to the ray tracing unit box test data path." units="issues" counter="RT_BOX_ISSUE_COUNT" offset="85" />
|
||||
<event name="MaliRTUBoxIssueCy" title="Ray tracing box tester issue cycles" description="The number of active issue cycles for the ray tracing box test unit." units="cycles" counter="RT_RAY_BOX_ISSUED" offset="78" />
|
||||
<event name="MaliRTUCacheHit" title="Ray tracing unit cache hit" description="The number of hits in the ray tracing unit cache." units="requests" counter="RT_CACHE_READ_HIT" offset="110" />
|
||||
<event name="MaliRTUCacheMiss" title="Ray tracing unit cache miss" description="The number of misses in the ray tracing unit cache." units="requests" counter="RT_CACHE_READ_MISS" offset="111" />
|
||||
<event name="MaliRTUFirstHitTerm" title="Ray tracing first hit terminations" description="The number of rays that terminate on their first hit." units="rays" counter="RT_TERM_FIRST_HIT" offset="82" />
|
||||
<event name="MaliRTUIssueCy" title="Ray tracing unit issue cycles" description="The number of cycles the ray tracing unit was issuing work." units="cycles" equation="max((RT_RAY_BOX_ISSUED), (RT_RAY_TRI_ISSUED))" />
|
||||
<event name="MaliRTUMiss" title="Ray tracing triangle test misses" description="The number of triangle intersection tests that do not intersect." units="rays" counter="RT_MISS" offset="83" />
|
||||
<event name="MaliRTUNewTraceInstr" title="Ray tracing new trace messages" description="The number of ray tracing messages starting new rays." units="requests" counter="RT_TRACE_MSG_NEW" offset="120" />
|
||||
<event name="MaliRTUNonOpaqueHit" title="Ray tracing non-opaque triangle hits" description="The number of non-opaque triangle hits." units="tests" counter="RT_NON_OPAQUE_HIT" offset="81" />
|
||||
<event name="MaliRTUOpaqueHit" title="Ray tracing opaque triangle hits" description="The number of opaque triangle hits." units="tests" counter="RT_OPAQUE_HIT" offset="80" />
|
||||
<event name="MaliRTUPrimCull" title="Ray tracing culled primitives" description="The number of primitives culled by the ray tracing unit testers." units="issues" counter="RT_RAY_TRI_CULL" offset="126" />
|
||||
<event name="MaliRTURay" title="Ray tracing started rays" description="The number of rays started." units="rays" counter="RT_RAYS_STARTED" offset="84" />
|
||||
<event name="MaliRTUResumeTraceInstr" title="Ray tracing resume trace messages" description="The number of ray tracing messages resuming existing rays." units="requests" counter="RT_TRACE_MSG_RESUME" offset="121" />
|
||||
<event name="MaliRTUResumeTraceRays" title="Ray tracing resumed rays" description="The number of rays that are resumed after an initial hit or intersection shader." units="rays" counter="RT_TRACE_RESUME" offset="123" />
|
||||
<event name="MaliRTUStackOverflows" title="Ray tracing deep traversals" description="The number of rays that are using a deep traversal stack." units="rays" counter="RT_TRAVERSAL_STACK_OVERFLOW" offset="77" />
|
||||
<event name="MaliRTUTLASBoxIssue" title="Ray tracing TLAS box tests" description="The number of boxes from the TLAS issued to the ray tracing unit box test data path." units="issues" counter="RT_RAY_BOX_TLAS" offset="124" />
|
||||
<event name="MaliRTUTriCull" title="Ray tracing triangle primitive tests" description="The number of triangles issued to the ray tracing unit triangle test data path." units="issues" counter="RT_TRI_ISSUE_COUNT" offset="86" />
|
||||
<event name="MaliRTUTriIssueCy" title="Ray tracing triangle tester issue cycles" description="The number of active issue cycles for the ray tracing triangle test unit." units="cycles" counter="RT_RAY_TRI_ISSUED" offset="79" />
|
||||
<event name="MaliRTUUtil" title="Ray tracing unit utilization" description="The percentage utilization of the ray tracing unit." units="percent" equation="((max((RT_RAY_BOX_ISSUED), (RT_RAY_TRI_ISSUED))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" counter="BEATS_WR_LSC" offset="61" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_WR_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="((BEATS_WR_LSC) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusOtherWrBt" title="Miscellaneous write beats to L2 memory system" description="The number of write beats sent by any unit that is not specifically identified." units="beats" counter="BEATS_WR_OTHER" offset="63" />
|
||||
<event name="MaliSCBusOtherWrBy" title="Other unit write bytes to L2 memory system" description="The number of write bytes sent by any unit that is not specifically identified." units="bytes" equation="(BEATS_WR_OTHER) * (16)" />
|
||||
<event name="MaliSCBusRTUExtRdBt" title="Ray tracing unit read beats from external memory" description="The number of read beats received by the ray tracing unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_RTU_EXT" offset="41" />
|
||||
<event name="MaliSCBusRTUExtRdBy" title="Ray tracing unit read bytes from external memory" description="The total number of bytes read from the external memory system by the ray tracing unit." units="bytes" equation="(BEATS_RD_RTU_EXT) * (16)" />
|
||||
<event name="MaliSCBusRTUL2RdBt" title="Ray tracing unit read beats from L2 cache" description="The number of read beats received by the ray tracing unit." units="beats" counter="BEATS_RD_RTU" offset="40" />
|
||||
<event name="MaliSCBusRTUL2RdBy" title="Ray tracing unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the ray tracing unit." units="bytes" equation="(BEATS_RD_RTU) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((ITER_FRAG_TASK_COMPLETED) * (64) * (64))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="(max((TEX_FILT_NUM_OPERATIONS), (TEX_TFCH_NUM_TCL_OPERATIONS), (TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS), (TEX_CFCH_NUM_RP_OPERATIONS), (TEX_MSGI_NUM_FLITS), (TEX_MSGO_NUM_FLITS), (TEX_CFCH_NUM_L1_CL_OPERATIONS), (TEX_CFCH_NUM_L1_CT_OPERATIONS), (TEX_TIDX_NUM_OPERATIONS))) / ((((TEX_MSGO_NUM_MSG) * 2) - (TEX_MSGO_NUM_SINGLE_QUAD_MSG)) * 4)" />
|
||||
<event name="MaliTexCacheComplexLoadCy" title="Complex texture load cycles" description="The number of cycles loading complex texture formats." units="cycles" counter="TEX_CFCH_NUM_RP_OPERATIONS" offset="93" />
|
||||
<event name="MaliTexCacheLookupCy" title="Texture cache lookup cycles" description="The number of cycles returning data from the texture cache." units="cycles" counter="TEX_TFCH_NUM_TCL_OPERATIONS" offset="92" />
|
||||
<event name="MaliTexCacheSimpleLoadCy" title="Simple texture load cycles" description="The number of cycles loading simple texture formats." units="cycles" counter="TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS" offset="88" />
|
||||
<event name="MaliTexClkActiveCy" title="Texture unit clock active cycles" description="The number of cycles the texture unit was active." units="cycles" counter="TEX_TEXP_CLK_ACTIVE" offset="96" />
|
||||
<event name="MaliTexClkStarvedCy" title="Texture causing starvation cycles" description="The number of cycles the texture unit was active but did not return a texture sample." units="cycles" counter="TEX_MSGI_CLK_STARVED" offset="95" />
|
||||
<event name="MaliTexDataFetchStallCy" title="Texture fetch stall cycles" description="The number of cycles when a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" counter="TEX_TFCH_CLK_STALLED" offset="37" />
|
||||
<event name="MaliTexDescStallCy" title="Texture descriptor stall cycles" description="The number of cycles when a quad is stalled on texture descriptor fetch." units="cycles" counter="TEX_DFCH_CLK_STALLED" offset="36" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="39" />
|
||||
<event name="MaliTexFiltStallCy" title="Texture filtering stall cycles" description="The number of cycles when the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" offset="38" />
|
||||
<event name="MaliTexInBt" title="Texture message read beats" description="The number of texture request message data beats." units="beats" counter="TEX_MSGI_NUM_FLITS" offset="35" />
|
||||
<event name="MaliTexInBusUtil" title="Texture input bus utilization" description="The percentage load on the texture message input bus." units="percent" equation="((TEX_MSGI_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexIndexCy" title="Texture index calculation cycles" description="The number of cycles computing texel index values." units="cycles" counter="TEX_TIDX_NUM_OPERATIONS" offset="94" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="max((TEX_FILT_NUM_OPERATIONS), (TEX_TFCH_NUM_TCL_OPERATIONS), (TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS), (TEX_CFCH_NUM_RP_OPERATIONS), (TEX_MSGI_NUM_FLITS), (TEX_MSGO_NUM_FLITS), (TEX_CFCH_NUM_L1_CL_OPERATIONS), (TEX_CFCH_NUM_L1_CT_OPERATIONS), (TEX_TIDX_NUM_OPERATIONS))" />
|
||||
<event name="MaliTexL1CacheLoadCy" title="L1 texture cache load cycles" description="The number of cycles the L1 cache is being loaded." units="cycles" counter="TEX_CFCH_NUM_OUTPUT_OPERATIONS" offset="87" />
|
||||
<event name="MaliTexL1CacheLookupCy" title="L1 texture cache lookup cycles" description="The number of cycles the L1 cache is being accessed." units="cycles" counter="TEX_CFCH_NUM_L1_CT_OPERATIONS" offset="90" />
|
||||
<event name="MaliTexL1CacheOutputCy" title="L1 texture cache output cycles" description="The number of cycles the L1 cache is returning data." units="cycles" counter="TEX_CFCH_NUM_L1_CL_OPERATIONS" offset="89" />
|
||||
<event name="MaliTexOutBt" title="Texture message write beats" description="The number of texture response message data beats." units="beats" counter="TEX_MSGO_NUM_FLITS" offset="43" />
|
||||
<event name="MaliTexOutBusUtil" title="Texture output bus utilization" description="The percentage load on the texture message output bus." units="percent" equation="((TEX_MSGO_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexOutMsg" title="Texture messages" description="The number of output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_MSG" offset="42" />
|
||||
<event name="MaliTexOutSingleMsg" title="Texture messages with single quad" description="The number of single quad output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_SINGLE_QUAD_MSG" offset="91" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" equation="((TEX_MSGO_NUM_MSG) * 2) - (TEX_MSGO_NUM_SINGLE_QUAD_MSG)" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="(((TEX_MSGO_NUM_MSG) * 2) - (TEX_MSGO_NUM_SINGLE_QUAD_MSG)) * 4" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="((max((TEX_FILT_NUM_OPERATIONS), (TEX_TFCH_NUM_TCL_OPERATIONS), (TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS), (TEX_CFCH_NUM_RP_OPERATIONS), (TEX_MSGI_NUM_FLITS), (TEX_MSGO_NUM_FLITS), (TEX_CFCH_NUM_L1_CL_OPERATIONS), (TEX_CFCH_NUM_L1_CT_OPERATIONS), (TEX_TIDX_NUM_OPERATIONS))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (4)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (4)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (4)) + ((VARY_SLOT_16) / (4))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (4)) + ((VARY_SLOT_16) / (4))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceCullPrim" title="Facing test culled primitives" description="The number of primitives that are culled by facing tests." units="primitives" counter="PRIM_FACE_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceCullRate" title="Facing plane test cull percentage" description="The percentage of primitives culled by the facing test." units="percent" equation="((PRIM_FACE_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FRUSTUM_CULLED) - (PRIM_SCISSOR_CULLED))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPlaneCullPrim" title="Frustum test culled primitives" description="The number of primitives that are culled by frustum tests." units="primitives" counter="PRIM_FRUSTUM_CULLED" offset="13" />
|
||||
<event name="MaliGeomPlaneCullRate" title="Frustum test cull percentage" description="The percentage of primitives culled by the frustum test." units="percent" equation="((PRIM_FRUSTUM_CULLED) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadPartTask" title="Partial tiler position shading requests" description="The number of partial position shading requests in the tiler geometry flow." units="requests" counter="POS_SHADER_PARTIAL_WARPS" offset="22" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (16)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (16)) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FRUSTUM_CULLED) - (PRIM_SCISSOR_CULLED) - (PRIM_FACE_CULLED))) * 100" />
|
||||
<event name="MaliGeomScissorCullPrim" title="Scissor test culled primitives" description="The number of primitives that are culled by the scissor test." units="primitives" counter="PRIM_SCISSOR_CULLED" offset="70" />
|
||||
<event name="MaliGeomScissorCullRate" title="Scissor test cull percentage" description="The percentage of primitives culled by the scissor test." units="percent" equation="((PRIM_SCISSOR_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FRUSTUM_CULLED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadPartTask" title="Partial tiler varying shading requests" description="The number of partial varying shading requests in the tiler geometry flow." units="requests" counter="VAR_SHADER_PARTIAL_WARPS" offset="37" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="36" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (16)" />
|
||||
<event name="MaliGeomVisibleDVSPrim" title="Visible primitives using DVS" description="The number of primitives using DVS that are visible after culling." units="primitives" counter="PRIM_VISIBLE_DVS" offset="71" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerPrimAsPosShadStallCy" title="Primitive assembly position shading stall cycles" description="The number of cycles when primitive assembly is waiting for position shading." units="cycles" counter="PRIMASSY_POS_SHADER_WAIT" offset="64" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
</category>
|
||||
</metrics>
|
||||
247
src/panfrost/perf/generated/G31.xml
Normal file
247
src/panfrost/perf/generated/G31.xml
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G31">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles when work is queued for processing in the GPU fragment queue." units="cycles" counter="JS0_ACTIVE" offset="10" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" counter="JS0_JOBS" offset="8" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="JS0_TASKS" offset="9" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS0_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragQueueWaitDepCy" title="Fragment queue job dependency wait cycles" description="The number of cycles when queued fragment work is waiting for dependent work to complete." units="cycles" counter="JS0_WAIT_DEPEND" offset="14" />
|
||||
<event name="MaliFragQueueWaitFinishCy" title="Fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued fragment work to complete." units="cycles" counter="JS0_WAIT_FINISH" offset="15" />
|
||||
<event name="MaliFragQueueWaitFlushCy" title="Fragment queue cache flush wait cycles" description="The number of cycles when queued fragment work is waiting for a cache flush." units="cycles" counter="JS0_WAIT_FLUSH" offset="11" />
|
||||
<event name="MaliFragQueueWaitIssueCy" title="Fragment queue job issue wait cycles" description="The number of cycles when queued fragment work is waiting for an available processor." units="cycles" counter="JS0_WAIT_ISSUE" offset="13" />
|
||||
<event name="MaliFragQueueWaitRdCy" title="Fragment queue job descriptor read wait cycles" description="The number of cycles when queued fragment work is waiting for a descriptor load." units="cycles" counter="JS0_WAIT_READ" offset="12" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="7" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueActiveCy" title="Non-fragment queue active cycles" description="The number of cycles when work is queued in the GPU non-fragment queue." units="cycles" counter="JS1_ACTIVE" offset="18" />
|
||||
<event name="MaliNonFragQueueJob" title="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" counter="JS1_JOBS" offset="16" />
|
||||
<event name="MaliNonFragQueueTask" title="Non-fragment tasks" description="The number of non-fragment tasks processed." units="tasks" counter="JS1_TASKS" offset="17" />
|
||||
<event name="MaliNonFragQueueUtil" title="Non-fragment queue utilization" description="The non-fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS1_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueWaitDepCy" title="Non-fragment queue job dependency wait cycles" description="The number of cycles when queued non-fragment work is waiting for dependent work to complete." units="cycles" counter="JS1_WAIT_DEPEND" offset="22" />
|
||||
<event name="MaliNonFragQueueWaitFinishCy" title="Non-fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued non-fragment work to complete." units="cycles" counter="JS1_WAIT_FINISH" offset="23" />
|
||||
<event name="MaliNonFragQueueWaitFlushCy" title="Non-fragment queue cache flush wait cycles" description="The number of cycles when queued non-fragment work is waiting for a cache flush." units="cycles" counter="JS1_WAIT_FLUSH" offset="19" />
|
||||
<event name="MaliNonFragQueueWaitIssueCy" title="Non-fragment queue job issue wait cycles" description="The number of cycles when queued non-fragment work is waiting for an available processor." units="cycles" counter="JS1_WAIT_ISSUE" offset="21" />
|
||||
<event name="MaliNonFragQueueWaitRdCy" title="Non-fragment queue job descriptor read wait cycles" description="The number number of cycles when queued non-fragment work is waiting for a descriptor load." units="cycles" counter="JS1_WAIT_READ" offset="20" />
|
||||
<event name="MaliResQueueActiveCy" title="Reserved active cycles" description="The number of cycles when work is queued in the GPU reserved queue." units="cycles" counter="JS2_ACTIVE" offset="26" />
|
||||
<event name="MaliResQueueJob" title="Reserved queue jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" counter="JS2_JOBS" offset="24" />
|
||||
<event name="MaliResQueueTask" title="Reserved queue tasks" description="The number of reserved tasks processed." units="tasks" counter="JS2_TASKS" offset="25" />
|
||||
<event name="MaliResQueueWaitDepCy" title="Reserved queue job dependency wait cycles" description="The number of cycles when queued reserved work is waiting for dependent work to complete." units="cycles" counter="JS2_WAIT_DEPEND" offset="30" />
|
||||
<event name="MaliResQueueWaitFinishCy" title="Reserved queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued reserved work to complete." units="cycles" counter="JS2_WAIT_FINISH" offset="31" />
|
||||
<event name="MaliResQueueWaitFlushCy" title="Reserved queue cache flush wait cycles" description="The number of cycles when queued reserved work is waiting for a cache flush." units="cycles" counter="JS2_WAIT_FLUSH" offset="27" />
|
||||
<event name="MaliResQueueWaitIssueCy" title="Reserved queue job issue wait cycles" description="The number of cycles when queued reserved work is waiting for an available processor." units="cycles" counter="JS2_WAIT_ISSUE" offset="29" />
|
||||
<event name="MaliResQueueWaitRdCy" title="Reserved queue job descriptor read wait cycles" description="The number of cycles when queued reserved work is waiting for a descriptor load." units="cycles" counter="JS2_WAIT_READ" offset="28" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="63" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
<event name="MaliMMUS2L2Hit" title="MMU stage 2 L2 lookup TLB hits" description="The number of stage 2 level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L2" offset="13" />
|
||||
<event name="MaliMMUS2L2Rd" title="MMU stage 2 L2 lookup requests" description="The number of stage 2 level 2 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L2" offset="11" />
|
||||
<event name="MaliMMUS2L3Hit" title="MMU stage 2 L3 lookup TLB hits" description="The number of stage 2 level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L3" offset="12" />
|
||||
<event name="MaliMMUS2L3Rd" title="MMU stage 2 L3 lookup requests" description="The number of stage 2 level 3 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L3" offset="10" />
|
||||
<event name="MaliMMUS2Lookup" title="MMU stage 2 lookup requests" description="The number of main MMU stage 2 address translations performed." units="requests" counter="MMU_S2_REQUESTS" offset="9" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="(EXEC_INSTR_COUNT)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="(((EXEC_INSTR_COUNT)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="29" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / (EXEC_INSTR_COUNT)) * 100" />
|
||||
<event name="MaliEngInstr" title="Arithmetic instruction issue cycles" description="The number of instructions run per warp." units="instructions" counter="EXEC_INSTR_COUNT" offset="28" />
|
||||
<event name="MaliEngStarveCy" title="Execution engine starvation cycles" description="The number of cycles when no new threads are available to run." units="cycles" counter="EXEC_INSTR_STARVING" offset="30" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - ((FRAG_WARPS))" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - ((FRAG_WARPS))) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="((FRAG_WARPS) * (4)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliFragPartWarp" title="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" counter="FRAG_PARTIAL_WARPS" offset="10" />
|
||||
<event name="MaliFragPartWarpRate" title="Partial coverage percentage" description="The percentage of warps that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_WARPS) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragRdPrim" title="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES" offset="5" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="(FRAG_WARPS)" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" equation="(FRAG_WARPS) * (4)" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / ((FRAG_WARPS) * (4))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (FRAG_PTILES)) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (4)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (4))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="63" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="61" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_FILT_NUM_OPERATIONS)) / ((TEX_MSGI_NUM_QUADS) * 4)" />
|
||||
<event name="MaliTexCacheCompressFetch" title="Compressed texture line fetch requests" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" offset="41" />
|
||||
<event name="MaliTexCacheCompressFetchRate" title="Texture data fetches from compressed lines" description="The percentage of texture line fetches that are from block compressed textures." units="percent" equation="((TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED) / (TEX_TFCH_NUM_LINES_FETCHED)) * 100" />
|
||||
<event name="MaliTexCacheFetch" title="Texture line fetch requests" description="The number of texture line fetches from the L2 cache." units="issues" counter="TEX_TFCH_NUM_LINES_FETCHED" offset="40" />
|
||||
<event name="MaliTexCacheLookup" title="Texture cache lookup requests" description="The number of texture cache lookup cycles." units="requests" counter="TEX_TFCH_NUM_OPERATIONS" offset="42" />
|
||||
<event name="MaliTexCacheUtil" title="Texture unit cache utilization" description="The percentage utilization of the texturing unit cache lookup path." units="percent" equation="((TEX_TFCH_NUM_OPERATIONS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="43" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliTexMipInstrRate" title="Texture accesses using mipmapping percentage" description="The percentage of texture operations accessing mipmapped textures." units="percent" equation="((TEX_DFCH_NUM_PASSES_MIP_MAP) / (TEX_DFCH_NUM_PASSES)) * 100" />
|
||||
<event name="MaliTexQuadPass" title="Texture quad issues" description="The number of quad-width filtering passes." units="issues" counter="TEX_DFCH_NUM_PASSES" offset="36" />
|
||||
<event name="MaliTexQuadPassDescMiss" title="Texture quad descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests" counter="TEX_DFCH_NUM_PASSES_MISS" offset="37" />
|
||||
<event name="MaliTexQuadPassMip" title="Mipmapped texture quad issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" offset="38" />
|
||||
<event name="MaliTexQuadPassTri" title="Trilinear filtered texture quad issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" offset="39" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" counter="TEX_MSGI_NUM_QUADS" offset="35" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="(TEX_MSGI_NUM_QUADS) * 4" />
|
||||
<event name="MaliTexTriInstrRate" title="Texture accesses using trilinear filter percentage" description="The percentage of texture operations using trilinear filtering." units="percent" equation="((TEX_TIDX_NUM_SPLIT_MIP_MAP) / (TEX_MSGI_NUM_QUADS)) * 100" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_FILT_NUM_OPERATIONS)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (1)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (1)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceXYPlaneCullPrim" title="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" counter="PRIM_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceXYPlaneCullRate" title="Facing or XY plane test cull percentage" description="The percentage of primitives culled by the facing or frustum XY plane tests." units="percent" equation="((PRIM_CULLED) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED) - (PRIM_CLIPPED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomZPlaneCullPrim" title="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" counter="PRIM_CLIPPED" offset="13" />
|
||||
<event name="MaliGeomZPlaneCullRate" title="Z plane test cull percentage" description="The percentage of primitives culled by the frustum Z plane test." units="percent" equation="((PRIM_CLIPPED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosCacheHitRate" title="Position cache hit percentage" description="The percentage hit rate of the tiler position cache." units="percent" equation="((VCACHE_HIT) / ((VCACHE_HIT) + (VCACHE_MISS))) * 100" />
|
||||
<event name="MaliTilerPosCacheMiss" title="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests" counter="VCACHE_MISS" offset="27" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
<event name="MaliTilerWrBt" title="Internal write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" counter="BUS_WRITE" offset="19" />
|
||||
</category>
|
||||
</metrics>
|
||||
247
src/panfrost/perf/generated/G51.xml
Normal file
247
src/panfrost/perf/generated/G51.xml
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G51">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles when work is queued for processing in the GPU fragment queue." units="cycles" counter="JS0_ACTIVE" offset="10" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" counter="JS0_JOBS" offset="8" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="JS0_TASKS" offset="9" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS0_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragQueueWaitDepCy" title="Fragment queue job dependency wait cycles" description="The number of cycles when queued fragment work is waiting for dependent work to complete." units="cycles" counter="JS0_WAIT_DEPEND" offset="14" />
|
||||
<event name="MaliFragQueueWaitFinishCy" title="Fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued fragment work to complete." units="cycles" counter="JS0_WAIT_FINISH" offset="15" />
|
||||
<event name="MaliFragQueueWaitFlushCy" title="Fragment queue cache flush wait cycles" description="The number of cycles when queued fragment work is waiting for a cache flush." units="cycles" counter="JS0_WAIT_FLUSH" offset="11" />
|
||||
<event name="MaliFragQueueWaitIssueCy" title="Fragment queue job issue wait cycles" description="The number of cycles when queued fragment work is waiting for an available processor." units="cycles" counter="JS0_WAIT_ISSUE" offset="13" />
|
||||
<event name="MaliFragQueueWaitRdCy" title="Fragment queue job descriptor read wait cycles" description="The number of cycles when queued fragment work is waiting for a descriptor load." units="cycles" counter="JS0_WAIT_READ" offset="12" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="7" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueActiveCy" title="Non-fragment queue active cycles" description="The number of cycles when work is queued in the GPU non-fragment queue." units="cycles" counter="JS1_ACTIVE" offset="18" />
|
||||
<event name="MaliNonFragQueueJob" title="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" counter="JS1_JOBS" offset="16" />
|
||||
<event name="MaliNonFragQueueTask" title="Non-fragment tasks" description="The number of non-fragment tasks processed." units="tasks" counter="JS1_TASKS" offset="17" />
|
||||
<event name="MaliNonFragQueueUtil" title="Non-fragment queue utilization" description="The non-fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS1_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueWaitDepCy" title="Non-fragment queue job dependency wait cycles" description="The number of cycles when queued non-fragment work is waiting for dependent work to complete." units="cycles" counter="JS1_WAIT_DEPEND" offset="22" />
|
||||
<event name="MaliNonFragQueueWaitFinishCy" title="Non-fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued non-fragment work to complete." units="cycles" counter="JS1_WAIT_FINISH" offset="23" />
|
||||
<event name="MaliNonFragQueueWaitFlushCy" title="Non-fragment queue cache flush wait cycles" description="The number of cycles when queued non-fragment work is waiting for a cache flush." units="cycles" counter="JS1_WAIT_FLUSH" offset="19" />
|
||||
<event name="MaliNonFragQueueWaitIssueCy" title="Non-fragment queue job issue wait cycles" description="The number of cycles when queued non-fragment work is waiting for an available processor." units="cycles" counter="JS1_WAIT_ISSUE" offset="21" />
|
||||
<event name="MaliNonFragQueueWaitRdCy" title="Non-fragment queue job descriptor read wait cycles" description="The number number of cycles when queued non-fragment work is waiting for a descriptor load." units="cycles" counter="JS1_WAIT_READ" offset="20" />
|
||||
<event name="MaliResQueueActiveCy" title="Reserved active cycles" description="The number of cycles when work is queued in the GPU reserved queue." units="cycles" counter="JS2_ACTIVE" offset="26" />
|
||||
<event name="MaliResQueueJob" title="Reserved queue jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" counter="JS2_JOBS" offset="24" />
|
||||
<event name="MaliResQueueTask" title="Reserved queue tasks" description="The number of reserved tasks processed." units="tasks" counter="JS2_TASKS" offset="25" />
|
||||
<event name="MaliResQueueWaitDepCy" title="Reserved queue job dependency wait cycles" description="The number of cycles when queued reserved work is waiting for dependent work to complete." units="cycles" counter="JS2_WAIT_DEPEND" offset="30" />
|
||||
<event name="MaliResQueueWaitFinishCy" title="Reserved queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued reserved work to complete." units="cycles" counter="JS2_WAIT_FINISH" offset="31" />
|
||||
<event name="MaliResQueueWaitFlushCy" title="Reserved queue cache flush wait cycles" description="The number of cycles when queued reserved work is waiting for a cache flush." units="cycles" counter="JS2_WAIT_FLUSH" offset="27" />
|
||||
<event name="MaliResQueueWaitIssueCy" title="Reserved queue job issue wait cycles" description="The number of cycles when queued reserved work is waiting for an available processor." units="cycles" counter="JS2_WAIT_ISSUE" offset="29" />
|
||||
<event name="MaliResQueueWaitRdCy" title="Reserved queue job descriptor read wait cycles" description="The number of cycles when queued reserved work is waiting for a descriptor load." units="cycles" counter="JS2_WAIT_READ" offset="28" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="63" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
<event name="MaliMMUS2L2Hit" title="MMU stage 2 L2 lookup TLB hits" description="The number of stage 2 level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L2" offset="13" />
|
||||
<event name="MaliMMUS2L2Rd" title="MMU stage 2 L2 lookup requests" description="The number of stage 2 level 2 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L2" offset="11" />
|
||||
<event name="MaliMMUS2L3Hit" title="MMU stage 2 L3 lookup TLB hits" description="The number of stage 2 level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L3" offset="12" />
|
||||
<event name="MaliMMUS2L3Rd" title="MMU stage 2 L3 lookup requests" description="The number of stage 2 level 3 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L3" offset="10" />
|
||||
<event name="MaliMMUS2Lookup" title="MMU stage 2 lookup requests" description="The number of main MMU stage 2 address translations performed." units="requests" counter="MMU_S2_REQUESTS" offset="9" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="(EXEC_INSTR_COUNT)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="(((EXEC_INSTR_COUNT)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="29" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / (EXEC_INSTR_COUNT)) * 100" />
|
||||
<event name="MaliEngInstr" title="Arithmetic instruction issue cycles" description="The number of instructions run per warp." units="instructions" counter="EXEC_INSTR_COUNT" offset="28" />
|
||||
<event name="MaliEngStarveCy" title="Execution engine starvation cycles" description="The number of cycles when no new threads are available to run." units="cycles" counter="EXEC_INSTR_STARVING" offset="30" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - ((FRAG_WARPS))" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - ((FRAG_WARPS))) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="((FRAG_WARPS) * (4)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliFragPartWarp" title="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" counter="FRAG_PARTIAL_WARPS" offset="10" />
|
||||
<event name="MaliFragPartWarpRate" title="Partial coverage percentage" description="The percentage of warps that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_WARPS) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragRdPrim" title="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES" offset="5" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="(FRAG_WARPS)" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" equation="(FRAG_WARPS) * (4)" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / ((FRAG_WARPS) * (4))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (FRAG_PTILES)) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (4)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (4))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="61" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="63" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_FILT_NUM_OPERATIONS)) / ((TEX_MSGI_NUM_QUADS) * 4)" />
|
||||
<event name="MaliTexCacheCompressFetch" title="Compressed texture line fetch requests" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" offset="41" />
|
||||
<event name="MaliTexCacheCompressFetchRate" title="Texture data fetches from compressed lines" description="The percentage of texture line fetches that are from block compressed textures." units="percent" equation="((TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED) / (TEX_TFCH_NUM_LINES_FETCHED)) * 100" />
|
||||
<event name="MaliTexCacheFetch" title="Texture line fetch requests" description="The number of texture line fetches from the L2 cache." units="issues" counter="TEX_TFCH_NUM_LINES_FETCHED" offset="40" />
|
||||
<event name="MaliTexCacheLookup" title="Texture cache lookup requests" description="The number of texture cache lookup cycles." units="requests" counter="TEX_TFCH_NUM_OPERATIONS" offset="42" />
|
||||
<event name="MaliTexCacheUtil" title="Texture unit cache utilization" description="The percentage utilization of the texturing unit cache lookup path." units="percent" equation="((TEX_TFCH_NUM_OPERATIONS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="43" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliTexMipInstrRate" title="Texture accesses using mipmapping percentage" description="The percentage of texture operations accessing mipmapped textures." units="percent" equation="((TEX_DFCH_NUM_PASSES_MIP_MAP) / (TEX_DFCH_NUM_PASSES)) * 100" />
|
||||
<event name="MaliTexQuadPass" title="Texture quad issues" description="The number of quad-width filtering passes." units="issues" counter="TEX_DFCH_NUM_PASSES" offset="36" />
|
||||
<event name="MaliTexQuadPassDescMiss" title="Texture quad descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests" counter="TEX_DFCH_NUM_PASSES_MISS" offset="37" />
|
||||
<event name="MaliTexQuadPassMip" title="Mipmapped texture quad issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" offset="38" />
|
||||
<event name="MaliTexQuadPassTri" title="Trilinear filtered texture quad issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" offset="39" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" counter="TEX_MSGI_NUM_QUADS" offset="35" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="(TEX_MSGI_NUM_QUADS) * 4" />
|
||||
<event name="MaliTexTriInstrRate" title="Texture accesses using trilinear filter percentage" description="The percentage of texture operations using trilinear filtering." units="percent" equation="((TEX_TIDX_NUM_SPLIT_MIP_MAP) / (TEX_MSGI_NUM_QUADS)) * 100" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_FILT_NUM_OPERATIONS)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (1)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (1)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceXYPlaneCullPrim" title="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" counter="PRIM_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceXYPlaneCullRate" title="Facing or XY plane test cull percentage" description="The percentage of primitives culled by the facing or frustum XY plane tests." units="percent" equation="((PRIM_CULLED) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED) - (PRIM_CLIPPED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomZPlaneCullPrim" title="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" counter="PRIM_CLIPPED" offset="13" />
|
||||
<event name="MaliGeomZPlaneCullRate" title="Z plane test cull percentage" description="The percentage of primitives culled by the frustum Z plane test." units="percent" equation="((PRIM_CLIPPED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosCacheHitRate" title="Position cache hit percentage" description="The percentage hit rate of the tiler position cache." units="percent" equation="((VCACHE_HIT) / ((VCACHE_HIT) + (VCACHE_MISS))) * 100" />
|
||||
<event name="MaliTilerPosCacheMiss" title="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests" counter="VCACHE_MISS" offset="27" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
<event name="MaliTilerWrBt" title="Internal write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" counter="BUS_WRITE" offset="19" />
|
||||
</category>
|
||||
</metrics>
|
||||
251
src/panfrost/perf/generated/G52.xml
Normal file
251
src/panfrost/perf/generated/G52.xml
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G52">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles when work is queued for processing in the GPU fragment queue." units="cycles" counter="JS0_ACTIVE" offset="10" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" counter="JS0_JOBS" offset="8" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="JS0_TASKS" offset="9" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS0_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragQueueWaitDepCy" title="Fragment queue job dependency wait cycles" description="The number of cycles when queued fragment work is waiting for dependent work to complete." units="cycles" counter="JS0_WAIT_DEPEND" offset="14" />
|
||||
<event name="MaliFragQueueWaitFinishCy" title="Fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued fragment work to complete." units="cycles" counter="JS0_WAIT_FINISH" offset="15" />
|
||||
<event name="MaliFragQueueWaitFlushCy" title="Fragment queue cache flush wait cycles" description="The number of cycles when queued fragment work is waiting for a cache flush." units="cycles" counter="JS0_WAIT_FLUSH" offset="11" />
|
||||
<event name="MaliFragQueueWaitIssueCy" title="Fragment queue job issue wait cycles" description="The number of cycles when queued fragment work is waiting for an available processor." units="cycles" counter="JS0_WAIT_ISSUE" offset="13" />
|
||||
<event name="MaliFragQueueWaitRdCy" title="Fragment queue job descriptor read wait cycles" description="The number of cycles when queued fragment work is waiting for a descriptor load." units="cycles" counter="JS0_WAIT_READ" offset="12" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="7" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueActiveCy" title="Non-fragment queue active cycles" description="The number of cycles when work is queued in the GPU non-fragment queue." units="cycles" counter="JS1_ACTIVE" offset="18" />
|
||||
<event name="MaliNonFragQueueJob" title="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" counter="JS1_JOBS" offset="16" />
|
||||
<event name="MaliNonFragQueueTask" title="Non-fragment tasks" description="The number of non-fragment tasks processed." units="tasks" counter="JS1_TASKS" offset="17" />
|
||||
<event name="MaliNonFragQueueUtil" title="Non-fragment queue utilization" description="The non-fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS1_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueWaitDepCy" title="Non-fragment queue job dependency wait cycles" description="The number of cycles when queued non-fragment work is waiting for dependent work to complete." units="cycles" counter="JS1_WAIT_DEPEND" offset="22" />
|
||||
<event name="MaliNonFragQueueWaitFinishCy" title="Non-fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued non-fragment work to complete." units="cycles" counter="JS1_WAIT_FINISH" offset="23" />
|
||||
<event name="MaliNonFragQueueWaitFlushCy" title="Non-fragment queue cache flush wait cycles" description="The number of cycles when queued non-fragment work is waiting for a cache flush." units="cycles" counter="JS1_WAIT_FLUSH" offset="19" />
|
||||
<event name="MaliNonFragQueueWaitIssueCy" title="Non-fragment queue job issue wait cycles" description="The number of cycles when queued non-fragment work is waiting for an available processor." units="cycles" counter="JS1_WAIT_ISSUE" offset="21" />
|
||||
<event name="MaliNonFragQueueWaitRdCy" title="Non-fragment queue job descriptor read wait cycles" description="The number number of cycles when queued non-fragment work is waiting for a descriptor load." units="cycles" counter="JS1_WAIT_READ" offset="20" />
|
||||
<event name="MaliResQueueActiveCy" title="Reserved active cycles" description="The number of cycles when work is queued in the GPU reserved queue." units="cycles" counter="JS2_ACTIVE" offset="26" />
|
||||
<event name="MaliResQueueJob" title="Reserved queue jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" counter="JS2_JOBS" offset="24" />
|
||||
<event name="MaliResQueueTask" title="Reserved queue tasks" description="The number of reserved tasks processed." units="tasks" counter="JS2_TASKS" offset="25" />
|
||||
<event name="MaliResQueueWaitDepCy" title="Reserved queue job dependency wait cycles" description="The number of cycles when queued reserved work is waiting for dependent work to complete." units="cycles" counter="JS2_WAIT_DEPEND" offset="30" />
|
||||
<event name="MaliResQueueWaitFinishCy" title="Reserved queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued reserved work to complete." units="cycles" counter="JS2_WAIT_FINISH" offset="31" />
|
||||
<event name="MaliResQueueWaitFlushCy" title="Reserved queue cache flush wait cycles" description="The number of cycles when queued reserved work is waiting for a cache flush." units="cycles" counter="JS2_WAIT_FLUSH" offset="27" />
|
||||
<event name="MaliResQueueWaitIssueCy" title="Reserved queue job issue wait cycles" description="The number of cycles when queued reserved work is waiting for an available processor." units="cycles" counter="JS2_WAIT_ISSUE" offset="29" />
|
||||
<event name="MaliResQueueWaitRdCy" title="Reserved queue job descriptor read wait cycles" description="The number of cycles when queued reserved work is waiting for a descriptor load." units="cycles" counter="JS2_WAIT_READ" offset="28" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="63" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
<event name="MaliMMUS2L2Hit" title="MMU stage 2 L2 lookup TLB hits" description="The number of stage 2 level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L2" offset="13" />
|
||||
<event name="MaliMMUS2L2Rd" title="MMU stage 2 L2 lookup requests" description="The number of stage 2 level 2 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L2" offset="11" />
|
||||
<event name="MaliMMUS2L3Hit" title="MMU stage 2 L3 lookup TLB hits" description="The number of stage 2 level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L3" offset="12" />
|
||||
<event name="MaliMMUS2L3Rd" title="MMU stage 2 L3 lookup requests" description="The number of stage 2 level 3 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L3" offset="10" />
|
||||
<event name="MaliMMUS2Lookup" title="MMU stage 2 lookup requests" description="The number of main MMU stage 2 address translations performed." units="requests" counter="MMU_S2_REQUESTS" offset="9" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="(EXEC_INSTR_COUNT)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="(((EXEC_INSTR_COUNT)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreAllRegsWarp" title="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps" counter="WARP_REG_SIZE_64" offset="17" />
|
||||
<event name="MaliCoreAllRegsWarpRate" title="All registers warp percentage" description="The percentage of warps that require more than 32 registers." units="percent" equation="((WARP_REG_SIZE_64) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreFullWarp" title="Full warps" description="The number of warps that have a full thread slot allocation." units="warps" counter="FULL_QUAD_WARPS" offset="21" />
|
||||
<event name="MaliCoreFullWarpRate" title="Full warp percentage" description="The percentage of warps that have a full thread slot allocation." units="percent" equation="((FULL_QUAD_WARPS) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="29" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / (EXEC_INSTR_COUNT)) * 100" />
|
||||
<event name="MaliEngInstr" title="Arithmetic instruction issue cycles" description="The number of instructions run per warp." units="instructions" counter="EXEC_INSTR_COUNT" offset="28" />
|
||||
<event name="MaliEngStarveCy" title="Execution engine starvation cycles" description="The number of cycles when no new threads are available to run." units="cycles" counter="EXEC_INSTR_STARVING" offset="30" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (8)) / 4)" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (8)) / 4)) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="((FRAG_WARPS) * (8)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliFragPartWarp" title="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" counter="FRAG_PARTIAL_WARPS" offset="10" />
|
||||
<event name="MaliFragPartWarpRate" title="Partial coverage percentage" description="The percentage of warps that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_WARPS) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragRdPrim" title="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES" offset="5" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="((FRAG_WARPS) * (8)) / 4" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" equation="(FRAG_WARPS) * (8)" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / ((FRAG_WARPS) * (8))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (FRAG_PTILES)) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (8)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (8))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="63" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="61" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_FILT_NUM_OPERATIONS)) / ((TEX_MSGI_NUM_QUADS) * 4)" />
|
||||
<event name="MaliTexCacheCompressFetch" title="Compressed texture line fetch requests" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" offset="41" />
|
||||
<event name="MaliTexCacheCompressFetchRate" title="Texture data fetches from compressed lines" description="The percentage of texture line fetches that are from block compressed textures." units="percent" equation="((TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED) / (TEX_TFCH_NUM_LINES_FETCHED)) * 100" />
|
||||
<event name="MaliTexCacheFetch" title="Texture line fetch requests" description="The number of texture line fetches from the L2 cache." units="issues" counter="TEX_TFCH_NUM_LINES_FETCHED" offset="40" />
|
||||
<event name="MaliTexCacheLookup" title="Texture cache lookup requests" description="The number of texture cache lookup cycles." units="requests" counter="TEX_TFCH_NUM_OPERATIONS" offset="42" />
|
||||
<event name="MaliTexCacheUtil" title="Texture unit cache utilization" description="The percentage utilization of the texturing unit cache lookup path." units="percent" equation="((TEX_TFCH_NUM_OPERATIONS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="43" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliTexMipInstrRate" title="Texture accesses using mipmapping percentage" description="The percentage of texture operations accessing mipmapped textures." units="percent" equation="((TEX_DFCH_NUM_PASSES_MIP_MAP) / (TEX_DFCH_NUM_PASSES)) * 100" />
|
||||
<event name="MaliTexQuadPass" title="Texture quad issues" description="The number of quad-width filtering passes." units="issues" counter="TEX_DFCH_NUM_PASSES" offset="36" />
|
||||
<event name="MaliTexQuadPassDescMiss" title="Texture quad descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests" counter="TEX_DFCH_NUM_PASSES_MISS" offset="37" />
|
||||
<event name="MaliTexQuadPassMip" title="Mipmapped texture quad issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" offset="38" />
|
||||
<event name="MaliTexQuadPassTri" title="Trilinear filtered texture quad issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" offset="39" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" counter="TEX_MSGI_NUM_QUADS" offset="35" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="(TEX_MSGI_NUM_QUADS) * 4" />
|
||||
<event name="MaliTexTriInstrRate" title="Texture accesses using trilinear filter percentage" description="The percentage of texture operations using trilinear filtering." units="percent" equation="((TEX_TIDX_NUM_SPLIT_MIP_MAP) / (TEX_MSGI_NUM_QUADS)) * 100" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_FILT_NUM_OPERATIONS)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (1)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (1)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceXYPlaneCullPrim" title="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" counter="PRIM_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceXYPlaneCullRate" title="Facing or XY plane test cull percentage" description="The percentage of primitives culled by the facing or frustum XY plane tests." units="percent" equation="((PRIM_CULLED) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED) - (PRIM_CLIPPED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomZPlaneCullPrim" title="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" counter="PRIM_CLIPPED" offset="13" />
|
||||
<event name="MaliGeomZPlaneCullRate" title="Z plane test cull percentage" description="The percentage of primitives culled by the frustum Z plane test." units="percent" equation="((PRIM_CLIPPED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosCacheHitRate" title="Position cache hit percentage" description="The percentage hit rate of the tiler position cache." units="percent" equation="((VCACHE_HIT) / ((VCACHE_HIT) + (VCACHE_MISS))) * 100" />
|
||||
<event name="MaliTilerPosCacheMiss" title="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests" counter="VCACHE_MISS" offset="27" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
<event name="MaliTilerWrBt" title="Internal write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" counter="BUS_WRITE" offset="19" />
|
||||
</category>
|
||||
</metrics>
|
||||
247
src/panfrost/perf/generated/G71.xml
Normal file
247
src/panfrost/perf/generated/G71.xml
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G71">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles when work is queued for processing in the GPU fragment queue." units="cycles" counter="JS0_ACTIVE" offset="10" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" counter="JS0_JOBS" offset="8" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="JS0_TASKS" offset="9" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS0_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragQueueWaitDepCy" title="Fragment queue job dependency wait cycles" description="The number of cycles when queued fragment work is waiting for dependent work to complete." units="cycles" counter="JS0_WAIT_DEPEND" offset="14" />
|
||||
<event name="MaliFragQueueWaitFinishCy" title="Fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued fragment work to complete." units="cycles" counter="JS0_WAIT_FINISH" offset="15" />
|
||||
<event name="MaliFragQueueWaitFlushCy" title="Fragment queue cache flush wait cycles" description="The number of cycles when queued fragment work is waiting for a cache flush." units="cycles" counter="JS0_WAIT_FLUSH" offset="11" />
|
||||
<event name="MaliFragQueueWaitIssueCy" title="Fragment queue job issue wait cycles" description="The number of cycles when queued fragment work is waiting for an available processor." units="cycles" counter="JS0_WAIT_ISSUE" offset="13" />
|
||||
<event name="MaliFragQueueWaitRdCy" title="Fragment queue job descriptor read wait cycles" description="The number of cycles when queued fragment work is waiting for a descriptor load." units="cycles" counter="JS0_WAIT_READ" offset="12" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="7" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueActiveCy" title="Non-fragment queue active cycles" description="The number of cycles when work is queued in the GPU non-fragment queue." units="cycles" counter="JS1_ACTIVE" offset="18" />
|
||||
<event name="MaliNonFragQueueJob" title="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" counter="JS1_JOBS" offset="16" />
|
||||
<event name="MaliNonFragQueueTask" title="Non-fragment tasks" description="The number of non-fragment tasks processed." units="tasks" counter="JS1_TASKS" offset="17" />
|
||||
<event name="MaliNonFragQueueUtil" title="Non-fragment queue utilization" description="The non-fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS1_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueWaitDepCy" title="Non-fragment queue job dependency wait cycles" description="The number of cycles when queued non-fragment work is waiting for dependent work to complete." units="cycles" counter="JS1_WAIT_DEPEND" offset="22" />
|
||||
<event name="MaliNonFragQueueWaitFinishCy" title="Non-fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued non-fragment work to complete." units="cycles" counter="JS1_WAIT_FINISH" offset="23" />
|
||||
<event name="MaliNonFragQueueWaitFlushCy" title="Non-fragment queue cache flush wait cycles" description="The number of cycles when queued non-fragment work is waiting for a cache flush." units="cycles" counter="JS1_WAIT_FLUSH" offset="19" />
|
||||
<event name="MaliNonFragQueueWaitIssueCy" title="Non-fragment queue job issue wait cycles" description="The number of cycles when queued non-fragment work is waiting for an available processor." units="cycles" counter="JS1_WAIT_ISSUE" offset="21" />
|
||||
<event name="MaliNonFragQueueWaitRdCy" title="Non-fragment queue job descriptor read wait cycles" description="The number number of cycles when queued non-fragment work is waiting for a descriptor load." units="cycles" counter="JS1_WAIT_READ" offset="20" />
|
||||
<event name="MaliResQueueActiveCy" title="Reserved active cycles" description="The number of cycles when work is queued in the GPU reserved queue." units="cycles" counter="JS2_ACTIVE" offset="26" />
|
||||
<event name="MaliResQueueJob" title="Reserved queue jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" counter="JS2_JOBS" offset="24" />
|
||||
<event name="MaliResQueueTask" title="Reserved queue tasks" description="The number of reserved tasks processed." units="tasks" counter="JS2_TASKS" offset="25" />
|
||||
<event name="MaliResQueueWaitDepCy" title="Reserved queue job dependency wait cycles" description="The number of cycles when queued reserved work is waiting for dependent work to complete." units="cycles" counter="JS2_WAIT_DEPEND" offset="30" />
|
||||
<event name="MaliResQueueWaitFinishCy" title="Reserved queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued reserved work to complete." units="cycles" counter="JS2_WAIT_FINISH" offset="31" />
|
||||
<event name="MaliResQueueWaitFlushCy" title="Reserved queue cache flush wait cycles" description="The number of cycles when queued reserved work is waiting for a cache flush." units="cycles" counter="JS2_WAIT_FLUSH" offset="27" />
|
||||
<event name="MaliResQueueWaitIssueCy" title="Reserved queue job issue wait cycles" description="The number of cycles when queued reserved work is waiting for an available processor." units="cycles" counter="JS2_WAIT_ISSUE" offset="29" />
|
||||
<event name="MaliResQueueWaitRdCy" title="Reserved queue job descriptor read wait cycles" description="The number of cycles when queued reserved work is waiting for a descriptor load." units="cycles" counter="JS2_WAIT_READ" offset="28" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="63" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
<event name="MaliMMUS2L2Hit" title="MMU stage 2 L2 lookup TLB hits" description="The number of stage 2 level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L2" offset="13" />
|
||||
<event name="MaliMMUS2L2Rd" title="MMU stage 2 L2 lookup requests" description="The number of stage 2 level 2 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L2" offset="11" />
|
||||
<event name="MaliMMUS2L3Hit" title="MMU stage 2 L3 lookup TLB hits" description="The number of stage 2 level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L3" offset="12" />
|
||||
<event name="MaliMMUS2L3Rd" title="MMU stage 2 L3 lookup requests" description="The number of stage 2 level 3 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L3" offset="10" />
|
||||
<event name="MaliMMUS2Lookup" title="MMU stage 2 lookup requests" description="The number of main MMU stage 2 address translations performed." units="requests" counter="MMU_S2_REQUESTS" offset="9" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="(EXEC_INSTR_COUNT)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="(((EXEC_INSTR_COUNT)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="29" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / (EXEC_INSTR_COUNT)) * 100" />
|
||||
<event name="MaliEngInstr" title="Arithmetic instruction issue cycles" description="The number of instructions run per warp." units="instructions" counter="EXEC_INSTR_COUNT" offset="28" />
|
||||
<event name="MaliEngStarveCy" title="Execution engine starvation cycles" description="The number of cycles when no new threads are available to run." units="cycles" counter="EXEC_INSTR_STARVING" offset="30" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - ((FRAG_WARPS))" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - ((FRAG_WARPS))) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="((FRAG_WARPS) * (4)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliFragPartWarp" title="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" counter="FRAG_PARTIAL_WARPS" offset="10" />
|
||||
<event name="MaliFragPartWarpRate" title="Partial coverage percentage" description="The percentage of warps that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_WARPS) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragRdPrim" title="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES" offset="5" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="(FRAG_WARPS)" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" equation="(FRAG_WARPS) * (4)" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / ((FRAG_WARPS) * (4))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (FRAG_PTILES)) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (4)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (4))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" counter="BEATS_WR_LSC" offset="61" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_WR_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="((BEATS_WR_LSC) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusOtherWrBt" title="Miscellaneous write beats to L2 memory system" description="The number of write beats sent by any unit that is not specifically identified." units="beats" counter="BEATS_WR_OTHER" offset="63" />
|
||||
<event name="MaliSCBusOtherWrBy" title="Other unit write bytes to L2 memory system" description="The number of write bytes sent by any unit that is not specifically identified." units="bytes" equation="(BEATS_WR_OTHER) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_COORD_ISSUE)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_COORD_ISSUE)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTex3DInstr" title="3D texture instructions" description="The number of texture operations acting on a 3D texture." units="requests" counter="TEX_INSTR_3D" offset="38" />
|
||||
<event name="MaliTex3DInstrRate" title="Texture samples using 3D texture percentage" description="The percentage of texture operations accessing 3D textures." units="percent" equation="((TEX_INSTR_3D) / (TEX_INSTR)) * 100" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_COORD_ISSUE)) / ((TEX_INSTR))" />
|
||||
<event name="MaliTexCompressInstr" title="Compressed texture instructions" description="The number of texture operations acting on a compressed texture." units="requests" counter="TEX_INSTR_COMPRESSED" offset="37" />
|
||||
<event name="MaliTexCompressInstrRate" title="Texture samples using compressed texture percentage" description="The percentage of texture operations accessing compressed textures." units="percent" equation="((TEX_INSTR_COMPRESSED) / (TEX_INSTR)) * 100" />
|
||||
<event name="MaliTexCoordStallCy" title="Texture filtering coordinate stall cycles" description="The number of cycles when threads are stalled at the texel coordinate calculation stage." units="cycles" counter="TEX_COORD_STALL" offset="41" />
|
||||
<event name="MaliTexDataStallCy" title="Texture line fill stall cycles" description="The number of cycles when at least one thread is waiting for data from the texture cache, but no lookup is completed." units="cycles" counter="TEX_STARVE_CACHE" offset="42" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_COORD_ISSUE" offset="40" />
|
||||
<event name="MaliTexInstr" title="Texture instructions" description="The number of thread-width texture operations processed." units="requests" counter="TEX_INSTR" offset="35" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_COORD_ISSUE)" />
|
||||
<event name="MaliTexMipInstr" title="Mipmapped texture instructions" description="The number of texture operations that act on a mipmapped texture." units="requests" counter="TEX_INSTR_MIPMAP" offset="36" />
|
||||
<event name="MaliTexMipInstrRate" title="Texture accesses using mipmapping percentage" description="The percentage of texture operations accessing mipmapped textures." units="percent" equation="((TEX_INSTR_MIPMAP) / (TEX_INSTR)) * 100" />
|
||||
<event name="MaliTexPartDataStallCy" title="Texture filtering partial data stall cycles" description="The number of cycles when at least one thread fetched some data from the texture cache, but no filtering operation is started." units="cycles" counter="TEX_STARVE_FILTER" offset="43" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="(TEX_INSTR)" />
|
||||
<event name="MaliTexTriInstr" title="Trilinear filtered texture instructions" description="The number of texture operations using a trilinear texture filter." units="requests" counter="TEX_INSTR_TRILINEAR" offset="39" />
|
||||
<event name="MaliTexTriInstrRate" title="Texture accesses using trilinear filter percentage" description="The percentage of texture operations using trilinear filtering." units="percent" equation="((TEX_INSTR_TRILINEAR) / (TEX_INSTR)) * 100" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_COORD_ISSUE)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (1)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (1)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceXYPlaneCullPrim" title="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" counter="PRIM_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceXYPlaneCullRate" title="Facing or XY plane test cull percentage" description="The percentage of primitives culled by the facing or frustum XY plane tests." units="percent" equation="((PRIM_CULLED) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED) - (PRIM_CLIPPED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomZPlaneCullPrim" title="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" counter="PRIM_CLIPPED" offset="13" />
|
||||
<event name="MaliGeomZPlaneCullRate" title="Z plane test cull percentage" description="The percentage of primitives culled by the frustum Z plane test." units="percent" equation="((PRIM_CLIPPED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosCacheHitRate" title="Position cache hit percentage" description="The percentage hit rate of the tiler position cache." units="percent" equation="((VCACHE_HIT) / ((VCACHE_HIT) + (VCACHE_MISS))) * 100" />
|
||||
<event name="MaliTilerPosCacheMiss" title="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests" counter="VCACHE_MISS" offset="27" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
<event name="MaliTilerWrBt" title="Internal write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" counter="BUS_WRITE" offset="19" />
|
||||
</category>
|
||||
</metrics>
|
||||
272
src/panfrost/perf/generated/G710.xml
Normal file
272
src/panfrost/perf/generated/G710.xml
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G710">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliCS0WaitStallCy" title="Command stream 0 wait stall cycles" description="The number of cycles that command stream interface 0 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF0_WAIT_BLOCKED" offset="51" />
|
||||
<event name="MaliCS1WaitStallCy" title="Command stream 1 wait stall cycles" description="The number of cycles that command stream interface 1 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF1_WAIT_BLOCKED" offset="55" />
|
||||
<event name="MaliCS2WaitStallCy" title="Command stream 2 wait stall cycles" description="The number of cycles that command stream interface 2 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF2_WAIT_BLOCKED" offset="59" />
|
||||
<event name="MaliCS3WaitStallCy" title="Command stream 3 wait stall cycles" description="The number of cycles that command stream interface 3 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF3_WAIT_BLOCKED" offset="63" />
|
||||
<event name="MaliCSFCEUActiveCy" title="Command execution unit active cycles" description="The number of cycles that the CEU is processing commands." units="cycles" counter="CEU_ACTIVE" offset="40" />
|
||||
<event name="MaliCSFCEUUtil" title="Command execution unit utilization" description="The CSF command execution unit utilization compared against the GPU active cycles." units="percent" equation="((CEU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCSFCS0ActiveCy" title="Command stream 0 active cycles" description="The number of cycles that command stream interface 0 contained an enabled command stream." units="cycles" counter="CSHWIF0_ENABLED" offset="48" />
|
||||
<event name="MaliCSFCS1ActiveCy" title="Command stream 1 active cycles" description="The number of cycles that command stream interface 1 contained an enabled command stream." units="cycles" counter="CSHWIF1_ENABLED" offset="52" />
|
||||
<event name="MaliCSFCS2ActiveCy" title="Command stream 2 active cycles" description="The number of cycles that command stream interface 2 contained an enabled command stream." units="cycles" counter="CSHWIF2_ENABLED" offset="56" />
|
||||
<event name="MaliCSFCS3ActiveCy" title="Command stream 3 active cycles" description="The number of cycles that command stream interface 3 contained an enabled command stream." units="cycles" counter="CSHWIF3_ENABLED" offset="60" />
|
||||
<event name="MaliCSFLSUActiveCy" title="Command load/store unit active cycles" description="The number of cycles that the load-store unit is processing commands." units="cycles" counter="LSU_ACTIVE" offset="45" />
|
||||
<event name="MaliCSFLSUUtil" title="Command load/store unit utilization" description="The CSF load/store unit utilization compared against the GPU active cycles." units="percent" equation="((LSU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCSFMCUActiveCy" title="MCU active cycles" description="The number of cycles when the CSF front-end MCU is actively processing." units="cycles" counter="MCU_ACTIVE" offset="5" />
|
||||
<event name="MaliCSFMCUUtil" title="Microcontroller utilization" description="The CSF MCU utilization compared against the GPU active cycles." units="percent" equation="((MCU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCompQueueActiveCy" title="Compute queue active cycles" description="The number of cycles that the compute queue is processing work." units="cycles" equation="(ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)" />
|
||||
<event name="MaliCompQueueAssignStallCy" title="Compute queue endpoint stall cycles" description="The number of cycles the compute queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_COMP_READY_BLOCKED" offset="30" />
|
||||
<event name="MaliCompQueueDrainStallCy" title="Compute queue endpoint drain stall cycles" description="The number of cycles the compute queue is waiting for endpoints to drain." units="cycles" counter="ITER_COMP_EP_DRAIN" offset="31" />
|
||||
<event name="MaliCompQueueIRQActiveCy" title="Compute queue interrupt pending cycles" description="The number of cycles that the compute queue IRQ was pending." units="cycles" counter="ITER_COMP_IRQ_ACTIVE" offset="28" />
|
||||
<event name="MaliCompQueueJob" title="Compute jobs" description="The number of compute jobs processed." units="jobs" counter="ITER_COMP_JOB_COMPLETED" offset="25" />
|
||||
<event name="MaliCompQueueTask" title="Compute tasks" description="The number of compute tasks processed." units="tasks" counter="ITER_COMP_TASK_COMPLETED" offset="26" />
|
||||
<event name="MaliCompQueueUtil" title="Compute queue utilization" description="The compute queue utilization compared against the GPU active cycles." units="percent" equation="(((ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCompQueuedCy" title="Compute work queued cycles" description="The number of cycles that the compute queue has work queued." units="cycles" counter="ITER_COMP_ACTIVE" offset="24" />
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles that the fragment queue is processing work." units="cycles" equation="(ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)" />
|
||||
<event name="MaliFragQueueAssignStallCy" title="Fragment queue endpoint stall cycles" description="The number of cycles the fragment queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_FRAG_READY_BLOCKED" offset="38" />
|
||||
<event name="MaliFragQueueIRQActiveCy" title="Fragment queue interrupt pending cycles" description="The number of cycles that the fragment queue IRQ was pending." units="cycles" counter="ITER_FRAG_IRQ_ACTIVE" offset="36" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of fragment jobs processed." units="jobs" counter="ITER_FRAG_JOB_COMPLETED" offset="33" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="ITER_FRAG_TASK_COMPLETED" offset="34" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="(((ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragQueuedCy" title="Fragment work queued cycles" description="The number of cycles that the fragment queue has work queued." units="cycles" counter="ITER_FRAG_ACTIVE" offset="32" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="4" />
|
||||
<event name="MaliGPUAnyQueueActiveCy" title="Any queue active cycles" description="The number of cycles when any GPU queue is active." units="cycles" counter="GPU_ITER_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQ" title="GPU interrupts" description="The number of interrupts raised by the GPU to the CPU." units="interrupts" counter="GPU_IRQ_COUNT" offset="11" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="10" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerQueueDrainStallCy" title="Vertex queue endpoint drain stall cycles" description="The number of cycles the vertex queue is waiting for endpoints to drain." units="cycles" counter="ITER_TILER_EP_DRAIN" offset="23" />
|
||||
<event name="MaliVertQueueActiveCy" title="Vertex queue active cycles" description="The number of cycles that the vertex queue is processing work." units="cycles" equation="(ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)" />
|
||||
<event name="MaliVertQueueAssignStallCy" title="Vertex queue endpoint stall cycles" description="The number of cycles the vertex queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_TILER_READY_BLOCKED" offset="22" />
|
||||
<event name="MaliVertQueueIRQActiveCy" title="Vertex queue interrupt pending cycles" description="The number of cycles that the vertex queue IRQ was pending." units="cycles" counter="ITER_TILER_IRQ_ACTIVE" offset="20" />
|
||||
<event name="MaliVertQueueJob" title="Vertex jobs" description="The number of vertex jobs processed." units="jobs" counter="ITER_TILER_JOB_COMPLETED" offset="17" />
|
||||
<event name="MaliVertQueueTask" title="Vertex tasks" description="The number of vertex tasks processed." units="tasks" counter="ITER_TILER_IDVS_TASK_COMPLETED" offset="18" />
|
||||
<event name="MaliVertQueueUtil" title="Vertex queue utilization" description="The vertex queue utilization compared against the GPU active cycles." units="percent" equation="(((ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliVertQueuedCy" title="Vertex work queued cycles" description="The number of cycles that the vertex shading queue has work queued." units="cycles" counter="ITER_TILER_ACTIVE" offset="16" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheCleanUnique" title="Input internal clean unique requests" description="The number of L2 cache line clean unique requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_CU" offset="13" />
|
||||
<event name="MaliL2CacheEvict" title="Input internal evict requests" description="The number of L2 cache line evict requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_EVICT" offset="12" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="13" />
|
||||
<event name="MaliL2CacheFlushCy" title="L2 cache flush cycles" description="The number of cycles spent flushing GPU L2 caches." units="cycles" counter="CACHE_FLUSH_CYCLES" offset="12" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="max((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU), (EXEC_INSTR_SFU) * 4)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="((max((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU), (EXEC_INSTR_SFU) * 4)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAnyActiveCy" title="Any workload active cycles" description="The number of cycles when the shader core is processing either a non-fragment workload or a fragment workload." units="cycles" counter="SHADER_CORE_ACTIVE" offset="53" />
|
||||
<event name="MaliAnyUtil" title="Shader core clock ratio" description="An estimate of shader core use relative to the GPU top-level clock." units="percent" equation="((SHADER_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreAllRegsWarp" title="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps" counter="WARP_REG_SIZE_64" offset="17" />
|
||||
<event name="MaliCoreAllRegsWarpRate" title="All registers warp percentage" description="The percentage of warps that require more than 32 registers." units="percent" equation="((WARP_REG_SIZE_64) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreFullWarp" title="Full warps" description="The number of warps that have a full thread slot allocation." units="warps" counter="FULL_QUAD_WARPS" offset="21" />
|
||||
<event name="MaliCoreFullWarpRate" title="Full warp percentage" description="The percentage of warps that have a full thread slot allocation." units="percent" equation="((FULL_QUAD_WARPS) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngArithInstr" title="Arithmetic instruction issue cycles" description="The total number of instructions issued to the FMA, CVT, and SFU pipes." units="instructions" equation="(EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)" />
|
||||
<event name="MaliEngCVTInstr" title="Arithmetic CVT pipe instructions" description="The number of instructions issued to the CVT pipe." units="instructions" counter="EXEC_INSTR_CVT" offset="28" />
|
||||
<event name="MaliEngCVTPipeUtil" title="CVT pipe utilization" description="Defines the utilization of the CVT pipe." units="percent" equation="((EXEC_INSTR_CVT) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="31" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngFMAInstr" title="Arithmetic FMA pipe instructions" description="The number of instructions issued to the FMA pipe." units="instructions" counter="EXEC_INSTR_FMA" offset="27" />
|
||||
<event name="MaliEngFMAPipeUtil" title="FMA pipe utilization" description="The utilization of the FMA pipe." units="percent" equation="((EXEC_INSTR_FMA) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngICacheMiss" title="Instruction cache misses" description="The number of instruction cache misses." units="requests" counter="EXEC_ICACHE_MISS" offset="32" />
|
||||
<event name="MaliEngSFUInstr" title="Arithmetic SFU pipe instructions" description="The number of instructions issued to the SFU pipe." units="instructions" counter="EXEC_INSTR_SFU" offset="29" />
|
||||
<event name="MaliEngSFUPipeUtil" title="SFU pipe utilization" description="The utilization of the SFU pipe." units="percent" equation="(((EXEC_INSTR_SFU) * 4) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngSWBlendInstr" title="Blend shader instructions" description="The number of blend shader invocations run." units="instructions" counter="CALL_BLEND_SHADER" offset="34" />
|
||||
<event name="MaliEngSWBlendRate" title="Shader blend percentage" description="The percentage of fragments that use shader-based blending." units="percent" equation="(((CALL_BLEND_SHADER) * 4) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliEngStarveCy" title="Processing unit starvation cycles" description="The number of cycles when the processing unit is starved of work." units="cycles" counter="EXEC_STARVE_ARITH" offset="33" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="((FRAG_WARPS) * (16)) / ((ITER_FRAG_TASK_COMPLETED) * (32) * (32))" />
|
||||
<event name="MaliFragRastPartQd" title="Partial rasterized fine quads" description="The number of rasterized fine quads created with partial coverage." units="quads" counter="FRAG_PARTIAL_QUADS_RAST" offset="10" />
|
||||
<event name="MaliFragRastPartQdRate" title="Partial coverage percentage" description="The percentage of rasterized fine quads that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_QUADS_RAST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragRdPrim" title="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES_OUT" offset="5" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="((FRAG_WARPS) * (16)) / 4" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" equation="(FRAG_WARPS) * (16)" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / ((FRAG_WARPS) * (16))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (4 * (FRAG_PTILES))) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (16)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (16))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="61" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="63" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((ITER_FRAG_TASK_COMPLETED) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_FILT_NUM_OPERATIONS)) / (((TEX_MSGO_NUM_MSG) * 2) * 4)" />
|
||||
<event name="MaliTexDataFetchStallCy" title="Texture fetch stall cycles" description="The number of cycles when a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" counter="TEX_TFCH_CLK_STALLED" offset="37" />
|
||||
<event name="MaliTexDescStallCy" title="Texture descriptor stall cycles" description="The number of cycles when a quad is stalled on texture descriptor fetch." units="cycles" counter="TEX_DFCH_CLK_STALLED" offset="36" />
|
||||
<event name="MaliTexFiltFullRate" title="Texture full speed filtering percentage" description="The percentage of texture filtering cycles using the full width of the texture filtering data path." units="percent" equation="(((TEX_FILT_NUM_FXR_OPERATIONS) + (TEX_FILT_NUM_FST_OPERATIONS)) / (TEX_FILT_NUM_OPERATIONS)) * 100" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="39" />
|
||||
<event name="MaliTexFiltStallCy" title="Texture filtering stall cycles" description="The number of cycles when the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" offset="38" />
|
||||
<event name="MaliTexFullBiFiltCy" title="Texture filtering cycles using full bilinear" description="The number of cycles when the filtering unit is filled with bilinear filtering." units="cycles" counter="TEX_FILT_NUM_FXR_OPERATIONS" offset="40" />
|
||||
<event name="MaliTexFullTriFiltCy" title="Texture filtering cycles using full trilinear" description="The number of cycles when the filtering unit is filled with trilinear filtering." units="cycles" counter="TEX_FILT_NUM_FST_OPERATIONS" offset="41" />
|
||||
<event name="MaliTexInBt" title="Texture message read beats" description="The number of texture request message data beats." units="beats" counter="TEX_MSGI_NUM_FLITS" offset="35" />
|
||||
<event name="MaliTexInBusUtil" title="Texture input bus utilization" description="The percentage load on the texture message input bus." units="percent" equation="((TEX_MSGI_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliTexOutBt" title="Texture message write beats" description="The number of texture response message data beats." units="beats" counter="TEX_MSGO_NUM_FLITS" offset="43" />
|
||||
<event name="MaliTexOutBusUtil" title="Texture output bus utilization" description="The percentage load on the texture message output bus." units="percent" equation="((TEX_MSGO_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexOutMsg" title="Texture messages" description="The number of output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_MSG" offset="42" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" equation="(TEX_MSGO_NUM_MSG) * 2" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="((TEX_MSGO_NUM_MSG) * 2) * 4" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_FILT_NUM_OPERATIONS)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (2)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (2)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (2)) + ((VARY_SLOT_16) / (2))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (2)) + ((VARY_SLOT_16) / (2))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceXYPlaneCullPrim" title="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" counter="PRIM_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceXYPlaneCullRate" title="Facing or XY plane test cull percentage" description="The percentage of primitives culled by the facing or frustum XY plane tests." units="percent" equation="((PRIM_CULLED) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED) - (PRIM_CLIPPED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomZPlaneCullPrim" title="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" counter="PRIM_CLIPPED" offset="13" />
|
||||
<event name="MaliGeomZPlaneCullRate" title="Z plane test cull percentage" description="The percentage of primitives culled by the frustum Z plane test." units="percent" equation="((PRIM_CLIPPED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosCacheHitRate" title="Position cache hit percentage" description="The percentage hit rate of the tiler position cache." units="percent" equation="((VCACHE_HIT) / ((VCACHE_HIT) + (VCACHE_MISS))) * 100" />
|
||||
<event name="MaliTilerPosCacheMiss" title="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests" counter="VCACHE_MISS" offset="27" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
</category>
|
||||
</metrics>
|
||||
294
src/panfrost/perf/generated/G715.xml
Normal file
294
src/panfrost/perf/generated/G715.xml
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G715">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliCS0WaitStallCy" title="Command stream 0 wait stall cycles" description="The number of cycles that command stream interface 0 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF0_WAIT_BLOCKED" offset="51" />
|
||||
<event name="MaliCS1WaitStallCy" title="Command stream 1 wait stall cycles" description="The number of cycles that command stream interface 1 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF1_WAIT_BLOCKED" offset="55" />
|
||||
<event name="MaliCS2WaitStallCy" title="Command stream 2 wait stall cycles" description="The number of cycles that command stream interface 2 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF2_WAIT_BLOCKED" offset="59" />
|
||||
<event name="MaliCS3WaitStallCy" title="Command stream 3 wait stall cycles" description="The number of cycles that command stream interface 3 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF3_WAIT_BLOCKED" offset="63" />
|
||||
<event name="MaliCSFCEUActiveCy" title="Command execution unit active cycles" description="The number of cycles that the CEU is processing commands." units="cycles" counter="CEU_ACTIVE" offset="40" />
|
||||
<event name="MaliCSFCEUUtil" title="Command execution unit utilization" description="The CSF command execution unit utilization compared against the GPU active cycles." units="percent" equation="((CEU_ACTIVE) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliCSFCS0ActiveCy" title="Command stream 0 active cycles" description="The number of cycles that command stream interface 0 contained an enabled command stream." units="cycles" counter="CSHWIF0_ENABLED" offset="48" />
|
||||
<event name="MaliCSFCS1ActiveCy" title="Command stream 1 active cycles" description="The number of cycles that command stream interface 1 contained an enabled command stream." units="cycles" counter="CSHWIF1_ENABLED" offset="52" />
|
||||
<event name="MaliCSFCS2ActiveCy" title="Command stream 2 active cycles" description="The number of cycles that command stream interface 2 contained an enabled command stream." units="cycles" counter="CSHWIF2_ENABLED" offset="56" />
|
||||
<event name="MaliCSFCS3ActiveCy" title="Command stream 3 active cycles" description="The number of cycles that command stream interface 3 contained an enabled command stream." units="cycles" counter="CSHWIF3_ENABLED" offset="60" />
|
||||
<event name="MaliCSFLSUActiveCy" title="Command load/store unit active cycles" description="The number of cycles that the load-store unit is processing commands." units="cycles" counter="LSU_ACTIVE" offset="45" />
|
||||
<event name="MaliCSFLSUUtil" title="Command load/store unit utilization" description="The CSF load/store unit utilization compared against the GPU active cycles." units="percent" equation="((LSU_ACTIVE) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliCSFMCUActiveCy" title="MCU active cycles" description="The number of cycles when the CSF front-end MCU is actively processing." units="cycles" counter="MCU_ACTIVE" offset="5" />
|
||||
<event name="MaliCSFMCUUtil" title="Microcontroller utilization" description="The CSF MCU utilization compared against the GPU active cycles." units="percent" equation="((MCU_ACTIVE) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliCompQueueActiveCy" title="Compute queue active cycles" description="The number of cycles that the compute queue is processing work." units="cycles" equation="(ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)" />
|
||||
<event name="MaliCompQueueAssignStallCy" title="Compute queue endpoint stall cycles" description="The number of cycles the compute queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_COMP_READY_BLOCKED" offset="30" />
|
||||
<event name="MaliCompQueueDrainStallCy" title="Compute queue endpoint drain stall cycles" description="The number of cycles the compute queue is waiting for endpoints to drain." units="cycles" counter="ITER_COMP_EP_DRAIN" offset="31" />
|
||||
<event name="MaliCompQueueIRQActiveCy" title="Compute queue interrupt pending cycles" description="The number of cycles that the compute queue IRQ was pending." units="cycles" counter="ITER_COMP_IRQ_ACTIVE" offset="28" />
|
||||
<event name="MaliCompQueueJob" title="Compute jobs" description="The number of compute jobs processed." units="jobs" counter="ITER_COMP_JOB_COMPLETED" offset="25" />
|
||||
<event name="MaliCompQueueTask" title="Compute tasks" description="The number of compute tasks processed." units="tasks" counter="ITER_COMP_TASK_COMPLETED" offset="26" />
|
||||
<event name="MaliCompQueueUtil" title="Compute queue utilization" description="The compute queue utilization compared against the GPU active cycles." units="percent" equation="(((ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliCompQueuedCy" title="Compute work queued cycles" description="The number of cycles that the compute queue has work queued." units="cycles" counter="ITER_COMP_ACTIVE" offset="24" />
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles that the fragment queue is processing work." units="cycles" equation="(ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)" />
|
||||
<event name="MaliFragQueueAssignStallCy" title="Fragment queue endpoint stall cycles" description="The number of cycles the fragment queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_FRAG_READY_BLOCKED" offset="38" />
|
||||
<event name="MaliFragQueueIRQActiveCy" title="Fragment queue interrupt pending cycles" description="The number of cycles that the fragment queue IRQ was pending." units="cycles" counter="ITER_FRAG_IRQ_ACTIVE" offset="36" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of fragment jobs processed." units="jobs" counter="ITER_FRAG_JOB_COMPLETED" offset="33" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="ITER_FRAG_TASK_COMPLETED" offset="34" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="(((ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliFragQueuedCy" title="Fragment work queued cycles" description="The number of cycles that the fragment queue has work queued." units="cycles" counter="ITER_FRAG_ACTIVE" offset="32" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" equation="(GPU_ITER_ACTIVE)" />
|
||||
<event name="MaliGPUActiveRawCy" title="GPU active raw cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="4" />
|
||||
<event name="MaliGPUAnyQueueActiveCy" title="Any queue active cycles" description="The number of cycles when any GPU queue is active." units="cycles" counter="GPU_ITER_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQ" title="GPU interrupts" description="The number of interrupts raised by the GPU to the CPU." units="interrupts" counter="GPU_IRQ_COUNT" offset="11" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="10" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliTilerQueueDrainStallCy" title="Vertex queue endpoint drain stall cycles" description="The number of cycles the vertex queue is waiting for endpoints to drain." units="cycles" counter="ITER_TILER_EP_DRAIN" offset="23" />
|
||||
<event name="MaliVertQueueActiveCy" title="Vertex queue active cycles" description="The number of cycles that the vertex queue is processing work." units="cycles" equation="(ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)" />
|
||||
<event name="MaliVertQueueAssignStallCy" title="Vertex queue endpoint stall cycles" description="The number of cycles the vertex queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_TILER_READY_BLOCKED" offset="22" />
|
||||
<event name="MaliVertQueueIRQActiveCy" title="Vertex queue interrupt pending cycles" description="The number of cycles that the vertex queue IRQ was pending." units="cycles" counter="ITER_TILER_IRQ_ACTIVE" offset="20" />
|
||||
<event name="MaliVertQueueJob" title="Vertex jobs" description="The number of vertex jobs processed." units="jobs" counter="ITER_TILER_JOB_COMPLETED" offset="17" />
|
||||
<event name="MaliVertQueueTask" title="Vertex tasks" description="The number of vertex tasks processed." units="tasks" counter="ITER_TILER_IDVS_TASK_COMPLETED" offset="18" />
|
||||
<event name="MaliVertQueueUtil" title="Vertex queue utilization" description="The vertex queue utilization compared against the GPU active cycles." units="percent" equation="(((ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliVertQueuedCy" title="Vertex work queued cycles" description="The number of cycles that the vertex shading queue has work queued." units="cycles" counter="ITER_TILER_ACTIVE" offset="16" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliL2CacheCleanUnique" title="Input internal clean unique requests" description="The number of L2 cache line clean unique requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_CU" offset="13" />
|
||||
<event name="MaliL2CacheEvict" title="Input internal evict requests" description="The number of L2 cache line evict requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_EVICT" offset="12" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="13" />
|
||||
<event name="MaliL2CacheFlushCy" title="L2 cache flush cycles" description="The number of cycles spent flushing GPU L2 caches." units="cycles" counter="CACHE_FLUSH_CYCLES" offset="12" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="max((EXEC_INSTR_CVT) + (EXEC_INSTR_SFU) + (((EXEC_INSTR_FMA) - min((EXEC_INSTR_FMA), (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) / 2), (EXEC_INSTR_SFU) * 4)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="((max((EXEC_INSTR_CVT) + (EXEC_INSTR_SFU) + (((EXEC_INSTR_FMA) - min((EXEC_INSTR_FMA), (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) / 2), (EXEC_INSTR_SFU) * 4)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAnyActiveCy" title="Any workload active cycles" description="The number of cycles when the shader core is processing either a non-fragment workload or a fragment workload." units="cycles" counter="SHADER_CORE_ACTIVE" offset="53" />
|
||||
<event name="MaliAnyUtil" title="Shader core clock ratio" description="An estimate of shader core use relative to the GPU top-level clock." units="percent" equation="((SHADER_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreAllRegsWarp" title="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps" counter="WARP_REG_SIZE_64" offset="17" />
|
||||
<event name="MaliCoreAllRegsWarpRate" title="All registers warp percentage" description="The percentage of warps that require more than 32 registers." units="percent" equation="((WARP_REG_SIZE_64) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreFragWarpOcc" title="Fragment warp occupancy" description="The thread occupancy of the fragment warps." units="percent" equation="((FRAG_SHADER_THREADS) / ((FRAG_WARPS) * (16))) * 100" />
|
||||
<event name="MaliCoreFullWarp" title="Full warps" description="The number of warps that have a full thread slot allocation." units="warps" counter="FULL_QUAD_WARPS" offset="21" />
|
||||
<event name="MaliCoreFullWarpRate" title="Full warp percentage" description="The percentage of warps that have a full thread slot allocation." units="percent" equation="((FULL_QUAD_WARPS) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngArithInstr" title="Arithmetic instruction issue cycles" description="The total number of instructions issued to the FMA, CVT, and SFU pipes." units="instructions" equation="(EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)" />
|
||||
<event name="MaliEngCVTInstr" title="Arithmetic CVT pipe instructions" description="The number of instructions issued to the CVT pipe." units="instructions" counter="EXEC_INSTR_CVT" offset="28" />
|
||||
<event name="MaliEngCVTPipeUtil" title="CVT pipe utilization" description="Defines the utilization of the CVT pipe." units="percent" equation="((EXEC_INSTR_CVT) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="31" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngFMAInstr" title="Arithmetic FMA pipe instructions" description="The number of instructions issued to the FMA pipe." units="instructions" counter="EXEC_INSTR_FMA" offset="27" />
|
||||
<event name="MaliEngFMAPipeUtil" title="FMA pipe utilization" description="The utilization of the FMA pipes." units="percent" equation="((EXEC_INSTR_FMA) / (2 * (EXEC_CORE_ACTIVE))) * 100" />
|
||||
<event name="MaliEngICacheMiss" title="Instruction cache misses" description="The number of instruction cache misses." units="requests" counter="EXEC_ICACHE_MISS" offset="32" />
|
||||
<event name="MaliEngNarrowInstr" title="Narrow arithmetic instructions" description="The number of narrow arithmetic instructions." units="instructions" counter="EXEC_INSTR_NARROW" offset="5" />
|
||||
<event name="MaliEngNarrowInstrRate" title="Narrow arithmetic percentage" description="The percentage of arithmetic instructions that operate on 8/16-bit types." units="percent" equation="((EXEC_INSTR_NARROW) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngSFUInstr" title="Arithmetic SFU pipe instructions" description="The number of instructions issued to the SFU pipe." units="instructions" counter="EXEC_INSTR_SFU" offset="29" />
|
||||
<event name="MaliEngSFUPipeUtil" title="SFU pipe utilization" description="The utilization of the SFU pipe." units="percent" equation="(((EXEC_INSTR_SFU) * 4) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngSWBlendInstr" title="Blend shader instructions" description="The number of blend shader invocations run." units="instructions" counter="CALL_BLEND_SHADER" offset="34" />
|
||||
<event name="MaliEngSWBlendRate" title="Shader blend percentage" description="The percentage of fragments that use shader-based blending." units="percent" equation="(((CALL_BLEND_SHADER) * 4) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliEngStarveCy" title="Processing unit starvation cycles" description="The number of cycles when the processing unit is starved of work." units="cycles" counter="EXEC_STARVE_ARITH" offset="33" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="(FRAG_SHADER_THREADS) / ((ITER_FRAG_TASK_COMPLETED) * (32) * (32))" />
|
||||
<event name="MaliFragRastCoarseQd" title="Rasterized coarse quads" description="The number of coarse quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_COARSE" offset="68" />
|
||||
<event name="MaliFragRastPartQd" title="Partial rasterized fine quads" description="The number of rasterized fine quads created with partial coverage." units="quads" counter="FRAG_PARTIAL_QUADS_RAST" offset="10" />
|
||||
<event name="MaliFragRastPartQdRate" title="Partial coverage percentage" description="The percentage of rasterized fine quads that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_QUADS_RAST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragShadRate" title="Fragment shading rate" description="The percentage of coarse quads generated relative to fine quads rasterized." units="percent" equation="((FRAG_QUADS_COARSE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="((FRAG_WARPS) * (16)) / 4" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" counter="FRAG_SHADER_THREADS" offset="69" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / (FRAG_SHADER_THREADS)" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (4 * (FRAG_PTILES))) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (16)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (16))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliRTUBox" title="Ray tracing box tests" description="The number of acceleration structure bounding boxes tested." units="boxes" counter="RT_RAY_BOX" offset="71" />
|
||||
<event name="MaliRTUBoxBin1" title="Ray tracing box nodes with 1-4 rays" description="The number of acceleration structure box nodes with 1 to 4 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_1_4" offset="76" />
|
||||
<event name="MaliRTUBoxBin13" title="Ray tracing box nodes with 13-16 rays" description="The number of acceleration structure box nodes with 13 to 16 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_13_16" offset="79" />
|
||||
<event name="MaliRTUBoxBin5" title="Ray tracing box nodes with 5-8 rays" description="The number of acceleration structure box nodes with 5 to 8 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_5_8" offset="77" />
|
||||
<event name="MaliRTUBoxBin9" title="Ray tracing box nodes with 9-12 rays" description="The number of acceleration structure box nodes with 9 to 12 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_9_12" offset="78" />
|
||||
<event name="MaliRTUBoxIssueCy" title="Ray tracing box tester issue cycles" description="The number of active issue cycles for the ray tracing box test unit." units="cycles" counter="RT_RAY_BOX_ISSUED" offset="85" />
|
||||
<event name="MaliRTUFirstHitTerm" title="Ray tracing first hit terminations" description="The number of rays that terminate on their first hit." units="rays" counter="RT_TERM_FIRST_HIT" offset="82" />
|
||||
<event name="MaliRTUIssueCy" title="Ray tracing unit issue cycles" description="The number of cycles the ray tracing unit was issuing work." units="cycles" equation="max((RT_RAY_BOX_ISSUED), (RT_RAY_TRI_ISSUED))" />
|
||||
<event name="MaliRTUMiss" title="Ray tracing triangle test misses" description="The number of triangle intersection tests that do not intersect." units="rays" counter="RT_MISS" offset="83" />
|
||||
<event name="MaliRTUNonOpaqueHit" title="Ray tracing non-opaque triangle hits" description="The number of non-opaque triangle hits." units="tests" counter="RT_NON_OPAQUE_HIT" offset="81" />
|
||||
<event name="MaliRTUOpaqueHit" title="Ray tracing opaque triangle hits" description="The number of opaque triangle hits." units="tests" counter="RT_OPAQUE_HIT" offset="80" />
|
||||
<event name="MaliRTURay" title="Ray tracing started rays" description="The number of rays started." units="rays" counter="RT_RAYS_STARTED" offset="84" />
|
||||
<event name="MaliRTUTri" title="Ray tracing triangle nodes tested" description="The number of triangle nodes tested." units="nodes" counter="RT_RAY_TRI" offset="70" />
|
||||
<event name="MaliRTUTriBin1" title="Ray tracing triangle nodes with 1-4 rays" description="The number of triangle nodes with 1 to 4 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_1_4" offset="72" />
|
||||
<event name="MaliRTUTriBin13" title="Ray tracing triangle nodes with 13-16 rays" description="The number of triangle nodes with 13 to 16 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_13_16" offset="75" />
|
||||
<event name="MaliRTUTriBin5" title="Ray tracing triangle nodes with 5-8 rays" description="The number of triangle nodes with 5 to 8 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_5_8" offset="73" />
|
||||
<event name="MaliRTUTriBin9" title="Ray tracing triangle nodes with 9-12 rays" description="The number of triangle nodes with 9 to 12 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_9_12" offset="74" />
|
||||
<event name="MaliRTUTriIssueCy" title="Ray tracing triangle tester issue cycles" description="The number of active issue cycles for the ray tracing triangle test unit." units="cycles" counter="RT_RAY_TRI_ISSUED" offset="86" />
|
||||
<event name="MaliRTUUtil" title="Ray tracing unit utilization" description="The percentage utilization of the ray tracing unit." units="percent" equation="((max((RT_RAY_BOX_ISSUED), (RT_RAY_TRI_ISSUED))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="61" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="63" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((ITER_FRAG_TASK_COMPLETED) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_FILT_NUM_OPERATIONS)) / (((TEX_MSGO_NUM_MSG) * 2) * 4)" />
|
||||
<event name="MaliTexDataFetchStallCy" title="Texture fetch stall cycles" description="The number of cycles when a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" counter="TEX_TFCH_CLK_STALLED" offset="37" />
|
||||
<event name="MaliTexDescStallCy" title="Texture descriptor stall cycles" description="The number of cycles when a quad is stalled on texture descriptor fetch." units="cycles" counter="TEX_DFCH_CLK_STALLED" offset="36" />
|
||||
<event name="MaliTexFiltFullRate" title="Texture full speed filtering percentage" description="The percentage of texture filtering cycles using the full width of the texture filtering data path." units="percent" equation="(((TEX_FILT_NUM_FXR_OPERATIONS) + (TEX_FILT_NUM_FST_OPERATIONS)) / (TEX_FILT_NUM_OPERATIONS)) * 100" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="39" />
|
||||
<event name="MaliTexFiltStallCy" title="Texture filtering stall cycles" description="The number of cycles when the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" offset="38" />
|
||||
<event name="MaliTexFullBiFiltCy" title="Texture filtering cycles using full bilinear" description="The number of cycles when the filtering unit is filled with bilinear filtering." units="cycles" counter="TEX_FILT_NUM_FXR_OPERATIONS" offset="40" />
|
||||
<event name="MaliTexFullTriFiltCy" title="Texture filtering cycles using full trilinear" description="The number of cycles when the filtering unit is filled with trilinear filtering." units="cycles" counter="TEX_FILT_NUM_FST_OPERATIONS" offset="41" />
|
||||
<event name="MaliTexInBt" title="Texture message read beats" description="The number of texture request message data beats." units="beats" counter="TEX_MSGI_NUM_FLITS" offset="35" />
|
||||
<event name="MaliTexInBusUtil" title="Texture input bus utilization" description="The percentage load on the texture message input bus." units="percent" equation="((TEX_MSGI_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliTexOutBt" title="Texture message write beats" description="The number of texture response message data beats." units="beats" counter="TEX_MSGO_NUM_FLITS" offset="43" />
|
||||
<event name="MaliTexOutBusUtil" title="Texture output bus utilization" description="The percentage load on the texture message output bus." units="percent" equation="((TEX_MSGO_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexOutMsg" title="Texture messages" description="The number of output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_MSG" offset="42" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" equation="(TEX_MSGO_NUM_MSG) * 2" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="((TEX_MSGO_NUM_MSG) * 2) * 4" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_FILT_NUM_OPERATIONS)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (4)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (4)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (4)) + ((VARY_SLOT_16) / (4))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (4)) + ((VARY_SLOT_16) / (4))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceCullPrim" title="Facing test culled primitives" description="The number of primitives that are culled by facing tests." units="primitives" counter="PRIM_FACE_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceCullRate" title="Facing plane test cull percentage" description="The percentage of primitives culled by the facing test." units="percent" equation="((PRIM_FACE_CULLED) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPlaneCullPrim" title="Frustum test culled primitives" description="The number of primitives that are culled by frustum tests." units="primitives" counter="PRIM_FRUSTUM_CULLED" offset="13" />
|
||||
<event name="MaliGeomPlaneCullRate" title="Frustum test cull percentage" description="The percentage of primitives culled by the frustum test." units="percent" equation="((PRIM_FRUSTUM_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FACE_CULLED))) * 100" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FACE_CULLED) - (PRIM_FRUSTUM_CULLED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / ((GPU_ITER_ACTIVE))) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
</category>
|
||||
</metrics>
|
||||
247
src/panfrost/perf/generated/G72.xml
Normal file
247
src/panfrost/perf/generated/G72.xml
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G72">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles when work is queued for processing in the GPU fragment queue." units="cycles" counter="JS0_ACTIVE" offset="10" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" counter="JS0_JOBS" offset="8" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="JS0_TASKS" offset="9" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS0_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragQueueWaitDepCy" title="Fragment queue job dependency wait cycles" description="The number of cycles when queued fragment work is waiting for dependent work to complete." units="cycles" counter="JS0_WAIT_DEPEND" offset="14" />
|
||||
<event name="MaliFragQueueWaitFinishCy" title="Fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued fragment work to complete." units="cycles" counter="JS0_WAIT_FINISH" offset="15" />
|
||||
<event name="MaliFragQueueWaitFlushCy" title="Fragment queue cache flush wait cycles" description="The number of cycles when queued fragment work is waiting for a cache flush." units="cycles" counter="JS0_WAIT_FLUSH" offset="11" />
|
||||
<event name="MaliFragQueueWaitIssueCy" title="Fragment queue job issue wait cycles" description="The number of cycles when queued fragment work is waiting for an available processor." units="cycles" counter="JS0_WAIT_ISSUE" offset="13" />
|
||||
<event name="MaliFragQueueWaitRdCy" title="Fragment queue job descriptor read wait cycles" description="The number of cycles when queued fragment work is waiting for a descriptor load." units="cycles" counter="JS0_WAIT_READ" offset="12" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="7" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueActiveCy" title="Non-fragment queue active cycles" description="The number of cycles when work is queued in the GPU non-fragment queue." units="cycles" counter="JS1_ACTIVE" offset="18" />
|
||||
<event name="MaliNonFragQueueJob" title="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" counter="JS1_JOBS" offset="16" />
|
||||
<event name="MaliNonFragQueueTask" title="Non-fragment tasks" description="The number of non-fragment tasks processed." units="tasks" counter="JS1_TASKS" offset="17" />
|
||||
<event name="MaliNonFragQueueUtil" title="Non-fragment queue utilization" description="The non-fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS1_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueWaitDepCy" title="Non-fragment queue job dependency wait cycles" description="The number of cycles when queued non-fragment work is waiting for dependent work to complete." units="cycles" counter="JS1_WAIT_DEPEND" offset="22" />
|
||||
<event name="MaliNonFragQueueWaitFinishCy" title="Non-fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued non-fragment work to complete." units="cycles" counter="JS1_WAIT_FINISH" offset="23" />
|
||||
<event name="MaliNonFragQueueWaitFlushCy" title="Non-fragment queue cache flush wait cycles" description="The number of cycles when queued non-fragment work is waiting for a cache flush." units="cycles" counter="JS1_WAIT_FLUSH" offset="19" />
|
||||
<event name="MaliNonFragQueueWaitIssueCy" title="Non-fragment queue job issue wait cycles" description="The number of cycles when queued non-fragment work is waiting for an available processor." units="cycles" counter="JS1_WAIT_ISSUE" offset="21" />
|
||||
<event name="MaliNonFragQueueWaitRdCy" title="Non-fragment queue job descriptor read wait cycles" description="The number number of cycles when queued non-fragment work is waiting for a descriptor load." units="cycles" counter="JS1_WAIT_READ" offset="20" />
|
||||
<event name="MaliResQueueActiveCy" title="Reserved active cycles" description="The number of cycles when work is queued in the GPU reserved queue." units="cycles" counter="JS2_ACTIVE" offset="26" />
|
||||
<event name="MaliResQueueJob" title="Reserved queue jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" counter="JS2_JOBS" offset="24" />
|
||||
<event name="MaliResQueueTask" title="Reserved queue tasks" description="The number of reserved tasks processed." units="tasks" counter="JS2_TASKS" offset="25" />
|
||||
<event name="MaliResQueueWaitDepCy" title="Reserved queue job dependency wait cycles" description="The number of cycles when queued reserved work is waiting for dependent work to complete." units="cycles" counter="JS2_WAIT_DEPEND" offset="30" />
|
||||
<event name="MaliResQueueWaitFinishCy" title="Reserved queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued reserved work to complete." units="cycles" counter="JS2_WAIT_FINISH" offset="31" />
|
||||
<event name="MaliResQueueWaitFlushCy" title="Reserved queue cache flush wait cycles" description="The number of cycles when queued reserved work is waiting for a cache flush." units="cycles" counter="JS2_WAIT_FLUSH" offset="27" />
|
||||
<event name="MaliResQueueWaitIssueCy" title="Reserved queue job issue wait cycles" description="The number of cycles when queued reserved work is waiting for an available processor." units="cycles" counter="JS2_WAIT_ISSUE" offset="29" />
|
||||
<event name="MaliResQueueWaitRdCy" title="Reserved queue job descriptor read wait cycles" description="The number of cycles when queued reserved work is waiting for a descriptor load." units="cycles" counter="JS2_WAIT_READ" offset="28" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="63" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
<event name="MaliMMUS2L2Hit" title="MMU stage 2 L2 lookup TLB hits" description="The number of stage 2 level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L2" offset="13" />
|
||||
<event name="MaliMMUS2L2Rd" title="MMU stage 2 L2 lookup requests" description="The number of stage 2 level 2 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L2" offset="11" />
|
||||
<event name="MaliMMUS2L3Hit" title="MMU stage 2 L3 lookup TLB hits" description="The number of stage 2 level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L3" offset="12" />
|
||||
<event name="MaliMMUS2L3Rd" title="MMU stage 2 L3 lookup requests" description="The number of stage 2 level 3 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L3" offset="10" />
|
||||
<event name="MaliMMUS2Lookup" title="MMU stage 2 lookup requests" description="The number of main MMU stage 2 address translations performed." units="requests" counter="MMU_S2_REQUESTS" offset="9" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="(EXEC_INSTR_COUNT)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="(((EXEC_INSTR_COUNT)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="29" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / (EXEC_INSTR_COUNT)) * 100" />
|
||||
<event name="MaliEngInstr" title="Arithmetic instruction issue cycles" description="The number of instructions run per warp." units="instructions" counter="EXEC_INSTR_COUNT" offset="28" />
|
||||
<event name="MaliEngStarveCy" title="Execution engine starvation cycles" description="The number of cycles when no new threads are available to run." units="cycles" counter="EXEC_INSTR_STARVING" offset="30" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - ((FRAG_WARPS))" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - ((FRAG_WARPS))) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="((FRAG_WARPS) * (4)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliFragPartWarp" title="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" counter="FRAG_PARTIAL_WARPS" offset="10" />
|
||||
<event name="MaliFragPartWarpRate" title="Partial coverage percentage" description="The percentage of warps that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_WARPS) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragRdPrim" title="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES" offset="5" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="(FRAG_WARPS)" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" equation="(FRAG_WARPS) * (4)" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / ((FRAG_WARPS) * (4))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (FRAG_PTILES)) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (4)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (4))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" counter="BEATS_WR_LSC" offset="61" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_WR_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="((BEATS_WR_LSC) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusOtherWrBt" title="Miscellaneous write beats to L2 memory system" description="The number of write beats sent by any unit that is not specifically identified." units="beats" counter="BEATS_WR_OTHER" offset="63" />
|
||||
<event name="MaliSCBusOtherWrBy" title="Other unit write bytes to L2 memory system" description="The number of write bytes sent by any unit that is not specifically identified." units="bytes" equation="(BEATS_WR_OTHER) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_COORD_ISSUE)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_COORD_ISSUE)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTex3DInstr" title="3D texture instructions" description="The number of texture operations acting on a 3D texture." units="requests" counter="TEX_INSTR_3D" offset="38" />
|
||||
<event name="MaliTex3DInstrRate" title="Texture samples using 3D texture percentage" description="The percentage of texture operations accessing 3D textures." units="percent" equation="((TEX_INSTR_3D) / (TEX_INSTR)) * 100" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_COORD_ISSUE)) / ((TEX_INSTR))" />
|
||||
<event name="MaliTexCompressInstr" title="Compressed texture instructions" description="The number of texture operations acting on a compressed texture." units="requests" counter="TEX_INSTR_COMPRESSED" offset="37" />
|
||||
<event name="MaliTexCompressInstrRate" title="Texture samples using compressed texture percentage" description="The percentage of texture operations accessing compressed textures." units="percent" equation="((TEX_INSTR_COMPRESSED) / (TEX_INSTR)) * 100" />
|
||||
<event name="MaliTexCoordStallCy" title="Texture filtering coordinate stall cycles" description="The number of cycles when threads are stalled at the texel coordinate calculation stage." units="cycles" counter="TEX_COORD_STALL" offset="41" />
|
||||
<event name="MaliTexDataStallCy" title="Texture line fill stall cycles" description="The number of cycles when at least one thread is waiting for data from the texture cache, but no lookup is completed." units="cycles" counter="TEX_STARVE_CACHE" offset="42" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_COORD_ISSUE" offset="40" />
|
||||
<event name="MaliTexInstr" title="Texture instructions" description="The number of thread-width texture operations processed." units="requests" counter="TEX_INSTR" offset="35" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_COORD_ISSUE)" />
|
||||
<event name="MaliTexMipInstr" title="Mipmapped texture instructions" description="The number of texture operations that act on a mipmapped texture." units="requests" counter="TEX_INSTR_MIPMAP" offset="36" />
|
||||
<event name="MaliTexMipInstrRate" title="Texture accesses using mipmapping percentage" description="The percentage of texture operations accessing mipmapped textures." units="percent" equation="((TEX_INSTR_MIPMAP) / (TEX_INSTR)) * 100" />
|
||||
<event name="MaliTexPartDataStallCy" title="Texture filtering partial data stall cycles" description="The number of cycles when at least one thread fetched some data from the texture cache, but no filtering operation is started." units="cycles" counter="TEX_STARVE_FILTER" offset="43" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="(TEX_INSTR)" />
|
||||
<event name="MaliTexTriInstr" title="Trilinear filtered texture instructions" description="The number of texture operations using a trilinear texture filter." units="requests" counter="TEX_INSTR_TRILINEAR" offset="39" />
|
||||
<event name="MaliTexTriInstrRate" title="Texture accesses using trilinear filter percentage" description="The percentage of texture operations using trilinear filtering." units="percent" equation="((TEX_INSTR_TRILINEAR) / (TEX_INSTR)) * 100" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_COORD_ISSUE)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (1)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (1)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceXYPlaneCullPrim" title="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" counter="PRIM_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceXYPlaneCullRate" title="Facing or XY plane test cull percentage" description="The percentage of primitives culled by the facing or frustum XY plane tests." units="percent" equation="((PRIM_CULLED) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED) - (PRIM_CLIPPED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomZPlaneCullPrim" title="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" counter="PRIM_CLIPPED" offset="13" />
|
||||
<event name="MaliGeomZPlaneCullRate" title="Z plane test cull percentage" description="The percentage of primitives culled by the frustum Z plane test." units="percent" equation="((PRIM_CLIPPED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosCacheHitRate" title="Position cache hit percentage" description="The percentage hit rate of the tiler position cache." units="percent" equation="((VCACHE_HIT) / ((VCACHE_HIT) + (VCACHE_MISS))) * 100" />
|
||||
<event name="MaliTilerPosCacheMiss" title="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests" counter="VCACHE_MISS" offset="27" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
<event name="MaliTilerWrBt" title="Internal write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" counter="BUS_WRITE" offset="19" />
|
||||
</category>
|
||||
</metrics>
|
||||
303
src/panfrost/perf/generated/G720.xml
Normal file
303
src/panfrost/perf/generated/G720.xml
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G720">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliBinningQueueActiveCy" title="Binning phase queue active cycles" description="The number of cycles that the binning phase queue is processing work. The binning phase includes position shading, culling, and binning." units="cycles" equation="(ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)" />
|
||||
<event name="MaliBinningQueueAssignStallCy" title="Binning phase queue endpoint stall cycles" description="The number of cycles the binning phase queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_TILER_READY_BLOCKED" offset="22" />
|
||||
<event name="MaliBinningQueueIRQActiveCy" title="Binning phase queue interrupt pending cycles" description="The number of cycles that the binning phase queue IRQ was pending." units="cycles" counter="ITER_TILER_IRQ_ACTIVE" offset="20" />
|
||||
<event name="MaliBinningQueueJob" title="Binning phase jobs" description="The number of binning phase jobs processed." units="jobs" counter="ITER_TILER_JOB_COMPLETED" offset="17" />
|
||||
<event name="MaliBinningQueueTask" title="Binning phase tasks" description="The number of binning phase tasks processed." units="tasks" counter="ITER_TILER_IDVS_TASK_COMPLETED" offset="18" />
|
||||
<event name="MaliBinningQueueUtil" title="Binning phase queue utilization" description="The binning phase queue utilization compared against the GPU active cycles. The binning phase includes position shading, culling, and binning." units="percent" equation="(((ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliBinningQueuedCy" title="Binning phase work queued cycles" description="The number of cycles that the binning phase queue has work queued. The binning phase includes position shading, culling, and binning." units="cycles" counter="ITER_TILER_ACTIVE" offset="16" />
|
||||
<event name="MaliCS0WaitStallCy" title="Command stream 0 wait stall cycles" description="The number of cycles that command stream interface 0 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF0_WAIT_BLOCKED" offset="51" />
|
||||
<event name="MaliCS1WaitStallCy" title="Command stream 1 wait stall cycles" description="The number of cycles that command stream interface 1 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF1_WAIT_BLOCKED" offset="55" />
|
||||
<event name="MaliCS2WaitStallCy" title="Command stream 2 wait stall cycles" description="The number of cycles that command stream interface 2 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF2_WAIT_BLOCKED" offset="59" />
|
||||
<event name="MaliCS3WaitStallCy" title="Command stream 3 wait stall cycles" description="The number of cycles that command stream interface 3 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF3_WAIT_BLOCKED" offset="63" />
|
||||
<event name="MaliCSFCEUActiveCy" title="Command execution unit active cycles" description="The number of cycles that the CEU is processing commands." units="cycles" counter="CEU_ACTIVE" offset="40" />
|
||||
<event name="MaliCSFCEUUtil" title="Command execution unit utilization" description="The CSF command execution unit utilization compared against the GPU active cycles." units="percent" equation="((CEU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCSFCS0ActiveCy" title="Command stream 0 active cycles" description="The number of cycles that command stream interface 0 contained an enabled command stream." units="cycles" counter="CSHWIF0_ENABLED" offset="48" />
|
||||
<event name="MaliCSFCS1ActiveCy" title="Command stream 1 active cycles" description="The number of cycles that command stream interface 1 contained an enabled command stream." units="cycles" counter="CSHWIF1_ENABLED" offset="52" />
|
||||
<event name="MaliCSFCS2ActiveCy" title="Command stream 2 active cycles" description="The number of cycles that command stream interface 2 contained an enabled command stream." units="cycles" counter="CSHWIF2_ENABLED" offset="56" />
|
||||
<event name="MaliCSFCS3ActiveCy" title="Command stream 3 active cycles" description="The number of cycles that command stream interface 3 contained an enabled command stream." units="cycles" counter="CSHWIF3_ENABLED" offset="60" />
|
||||
<event name="MaliCSFLSUActiveCy" title="Command load/store unit active cycles" description="The number of cycles that the load-store unit is processing commands." units="cycles" counter="LSU_ACTIVE" offset="45" />
|
||||
<event name="MaliCSFLSUUtil" title="Command load/store unit utilization" description="The CSF load/store unit utilization compared against the GPU active cycles." units="percent" equation="((LSU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCSFMCUActiveCy" title="MCU active cycles" description="The number of cycles when the CSF front-end MCU is actively processing." units="cycles" counter="MCU_ACTIVE" offset="5" />
|
||||
<event name="MaliCSFMCUUtil" title="Microcontroller utilization" description="The CSF MCU utilization compared against the GPU active cycles." units="percent" equation="((MCU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCompQueueActiveCy" title="Compute queue active cycles" description="The number of cycles that the compute queue is processing work." units="cycles" equation="(ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)" />
|
||||
<event name="MaliCompQueueAssignStallCy" title="Compute queue endpoint stall cycles" description="The number of cycles the compute queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_COMP_READY_BLOCKED" offset="30" />
|
||||
<event name="MaliCompQueueDrainStallCy" title="Compute queue endpoint drain stall cycles" description="The number of cycles the compute queue is waiting for endpoints to drain." units="cycles" counter="ITER_COMP_EP_DRAIN" offset="31" />
|
||||
<event name="MaliCompQueueIRQActiveCy" title="Compute queue interrupt pending cycles" description="The number of cycles that the compute queue IRQ was pending." units="cycles" counter="ITER_COMP_IRQ_ACTIVE" offset="28" />
|
||||
<event name="MaliCompQueueJob" title="Compute jobs" description="The number of compute jobs processed." units="jobs" counter="ITER_COMP_JOB_COMPLETED" offset="25" />
|
||||
<event name="MaliCompQueueTask" title="Compute tasks" description="The number of compute tasks processed." units="tasks" counter="ITER_COMP_TASK_COMPLETED" offset="26" />
|
||||
<event name="MaliCompQueueUtil" title="Compute queue utilization" description="The compute queue utilization compared against the GPU active cycles." units="percent" equation="(((ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCompQueuedCy" title="Compute work queued cycles" description="The number of cycles that the compute queue has work queued." units="cycles" counter="ITER_COMP_ACTIVE" offset="24" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="4" />
|
||||
<event name="MaliGPUAnyQueueActiveCy" title="Any queue active cycles" description="The number of cycles when any GPU queue is active." units="cycles" counter="GPU_ITER_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQ" title="GPU interrupts" description="The number of interrupts raised by the GPU to the CPU." units="interrupts" counter="GPU_IRQ_COUNT" offset="11" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="10" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliMainQueueActiveCy" title="Main phase queue active cycles" description="The number of cycles that the main phase queue is processing work. The main phase includes deferred vertex shading and all fragment shading." units="cycles" equation="(ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)" />
|
||||
<event name="MaliMainQueueAssignStallCy" title="Main phase queue endpoint stall cycles" description="The number of cycles the main phase queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_FRAG_READY_BLOCKED" offset="38" />
|
||||
<event name="MaliMainQueueIRQActiveCy" title="Main phase queue interrupt pending cycles" description="The number of cycles that the main phase queue IRQ was pending." units="cycles" counter="ITER_FRAG_IRQ_ACTIVE" offset="36" />
|
||||
<event name="MaliMainQueueJob" title="Main phase jobs" description="The number of main phase jobs processed." units="jobs" counter="ITER_FRAG_JOB_COMPLETED" offset="33" />
|
||||
<event name="MaliMainQueueTask" title="Main phase tasks" description="The number of fragment tasks processed." units="tasks" counter="ITER_FRAG_TASK_COMPLETED" offset="34" />
|
||||
<event name="MaliMainQueueUtil" title="Main phase queue utilization" description="The main phase queue utilization compared against the GPU active cycles. The main phase includes deferred vertex shading and all fragment shading." units="percent" equation="(((ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliMainQueuedCy" title="Main phase work queued cycles" description="The number of cycles that the main phase queue has work queued. The main phase includes deferred vertex shading and all fragment shading." units="cycles" counter="ITER_FRAG_ACTIVE" offset="32" />
|
||||
<event name="MaliTilerQueueDrainStallCy" title="Binning phase queue endpoint drain stall cycles" description="The number of cycles the binning phase queue is waiting for endpoints to drain." units="cycles" counter="ITER_TILER_EP_DRAIN" offset="23" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheCleanUnique" title="Input internal clean unique requests" description="The number of L2 cache line clean unique requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_CU" offset="13" />
|
||||
<event name="MaliL2CacheEvict" title="Input internal evict requests" description="The number of L2 cache line evict requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_EVICT" offset="12" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="13" />
|
||||
<event name="MaliL2CacheFlushCy" title="L2 cache flush cycles" description="The number of cycles spent flushing GPU L2 caches." units="cycles" counter="CACHE_FLUSH_CYCLES" offset="12" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="max((EXEC_INSTR_CVT) + (EXEC_INSTR_SFU) + (((EXEC_INSTR_FMA) - min((EXEC_INSTR_FMA), (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) / 2), (EXEC_INSTR_SFU) * 4)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="((max((EXEC_INSTR_CVT) + (EXEC_INSTR_SFU) + (((EXEC_INSTR_FMA) - min((EXEC_INSTR_FMA), (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) / 2), (EXEC_INSTR_SFU) * 4)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAnyActiveCy" title="Any workload active cycles" description="The number of cycles when the shader core is processing either a non-fragment workload or a fragment workload." units="cycles" counter="SHADER_CORE_ACTIVE" offset="53" />
|
||||
<event name="MaliAnyUtil" title="Shader core clock ratio" description="An estimate of shader core use relative to the GPU top-level clock." units="percent" equation="((SHADER_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCompOrBinningActiveCy" title="Compute or binning phase active cycles" description="The number of cycles when the shader core is processing some compute or binning phase workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliCompOrBinningUtil" title="Compute or binning phase utilization" description="The utilization of the shader core compute or binning phase path." units="percent" equation="((COMPUTE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreAllRegsWarp" title="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps" counter="WARP_REG_SIZE_64" offset="17" />
|
||||
<event name="MaliCoreAllRegsWarpRate" title="All registers warp percentage" description="The percentage of warps that require more than 32 registers." units="percent" equation="((WARP_REG_SIZE_64) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreFragWarpOcc" title="Fragment warp occupancy" description="The thread occupancy of the fragment warps." units="percent" equation="((FRAG_SHADER_THREADS) / ((FRAG_WARPS) * (16))) * 100" />
|
||||
<event name="MaliCoreFullWarp" title="Full warps" description="The number of warps that have a full thread slot allocation." units="warps" counter="FULL_QUAD_WARPS" offset="21" />
|
||||
<event name="MaliCoreFullWarpRate" title="Full warp percentage" description="The percentage of warps that have a full thread slot allocation." units="percent" equation="((FULL_QUAD_WARPS) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngArithInstr" title="Arithmetic instruction issue cycles" description="The total number of instructions issued to the FMA, CVT, and SFU pipes." units="instructions" equation="(EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)" />
|
||||
<event name="MaliEngCVTInstr" title="Arithmetic CVT pipe instructions" description="The number of instructions issued to the CVT pipe." units="instructions" counter="EXEC_INSTR_CVT" offset="28" />
|
||||
<event name="MaliEngCVTPipeUtil" title="CVT pipe utilization" description="Defines the utilization of the CVT pipe." units="percent" equation="((EXEC_INSTR_CVT) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="31" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngFMAInstr" title="Arithmetic FMA pipe instructions" description="The number of instructions issued to the FMA pipe." units="instructions" counter="EXEC_INSTR_FMA" offset="27" />
|
||||
<event name="MaliEngFMAPipeUtil" title="FMA pipe utilization" description="The utilization of the FMA pipes." units="percent" equation="((EXEC_INSTR_FMA) / (2 * (EXEC_CORE_ACTIVE))) * 100" />
|
||||
<event name="MaliEngICacheMiss" title="Instruction cache misses" description="The number of instruction cache misses." units="requests" counter="EXEC_ICACHE_MISS" offset="32" />
|
||||
<event name="MaliEngNarrowInstr" title="Narrow arithmetic instructions" description="The number of narrow arithmetic instructions." units="instructions" counter="EXEC_INSTR_NARROW" offset="5" />
|
||||
<event name="MaliEngNarrowInstrRate" title="Narrow arithmetic percentage" description="The percentage of arithmetic instructions that operate on 8/16-bit types." units="percent" equation="((EXEC_INSTR_NARROW) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngSFUInstr" title="Arithmetic SFU pipe instructions" description="The number of instructions issued to the SFU pipe." units="instructions" counter="EXEC_INSTR_SFU" offset="29" />
|
||||
<event name="MaliEngSFUPipeUtil" title="SFU pipe utilization" description="The utilization of the SFU pipe." units="percent" equation="(((EXEC_INSTR_SFU) * 4) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngSWBlendInstr" title="Blend shader instructions" description="The number of blend shader invocations run." units="instructions" counter="CALL_BLEND_SHADER" offset="34" />
|
||||
<event name="MaliEngSWBlendRate" title="Shader blend percentage" description="The percentage of fragments that use shader-based blending." units="percent" equation="(((CALL_BLEND_SHADER) * 4) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliEngStarveCy" title="Processing unit starvation cycles" description="The number of cycles when the processing unit is starved of work." units="cycles" counter="EXEC_STARVE_ARITH" offset="33" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="(FRAG_SHADER_THREADS) / ((ITER_FRAG_TASK_COMPLETED) * (64) * (64))" />
|
||||
<event name="MaliFragRastCoarseQd" title="Rasterized coarse quads" description="The number of coarse quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_COARSE" offset="68" />
|
||||
<event name="MaliFragRastPartQd" title="Partial rasterized fine quads" description="The number of rasterized fine quads created with partial coverage." units="quads" counter="FRAG_PARTIAL_QUADS_RAST" offset="10" />
|
||||
<event name="MaliFragRastPartQdRate" title="Partial coverage percentage" description="The percentage of rasterized fine quads that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_QUADS_RAST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragShadRate" title="Fragment shading rate" description="The percentage of coarse quads generated relative to fine quads rasterized." units="percent" equation="((FRAG_QUADS_COARSE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="((FRAG_WARPS) * (16)) / 4" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" counter="FRAG_SHADER_THREADS" offset="69" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / (FRAG_SHADER_THREADS)" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (4 * (FRAG_PTILES))) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliMainActiveCy" title="Main phase active cycles" description="The number of cycles when the shader core is processing a main phase workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliMainUtil" title="Main phase utilization" description="The utilization of the shader core main phase path." units="percent" equation="((FRAG_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragTask" title="Non-main phase core tasks" description="The number of non-main phase tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (16)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (16))" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliRTUBox" title="Ray tracing box tests" description="The number of acceleration structure bounding boxes tested." units="boxes" counter="RT_RAY_BOX" offset="71" />
|
||||
<event name="MaliRTUBoxBin1" title="Ray tracing box nodes with 1-4 rays" description="The number of acceleration structure box nodes with 1 to 4 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_1_4" offset="76" />
|
||||
<event name="MaliRTUBoxBin13" title="Ray tracing box nodes with 13-16 rays" description="The number of acceleration structure box nodes with 13 to 16 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_13_16" offset="79" />
|
||||
<event name="MaliRTUBoxBin5" title="Ray tracing box nodes with 5-8 rays" description="The number of acceleration structure box nodes with 5 to 8 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_5_8" offset="77" />
|
||||
<event name="MaliRTUBoxBin9" title="Ray tracing box nodes with 9-12 rays" description="The number of acceleration structure box nodes with 9 to 12 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_9_12" offset="78" />
|
||||
<event name="MaliRTUBoxIssueCy" title="Ray tracing box tester issue cycles" description="The number of active issue cycles for the ray tracing box test unit." units="cycles" counter="RT_RAY_BOX_ISSUED" offset="85" />
|
||||
<event name="MaliRTUFirstHitTerm" title="Ray tracing first hit terminations" description="The number of rays that terminate on their first hit." units="rays" counter="RT_TERM_FIRST_HIT" offset="82" />
|
||||
<event name="MaliRTUIssueCy" title="Ray tracing unit issue cycles" description="The number of cycles the ray tracing unit was issuing work." units="cycles" equation="max((RT_RAY_BOX_ISSUED), (RT_RAY_TRI_ISSUED))" />
|
||||
<event name="MaliRTUMiss" title="Ray tracing triangle test misses" description="The number of triangle intersection tests that do not intersect." units="rays" counter="RT_MISS" offset="83" />
|
||||
<event name="MaliRTUNonOpaqueHit" title="Ray tracing non-opaque triangle hits" description="The number of non-opaque triangle hits." units="tests" counter="RT_NON_OPAQUE_HIT" offset="81" />
|
||||
<event name="MaliRTUOpaqueHit" title="Ray tracing opaque triangle hits" description="The number of opaque triangle hits." units="tests" counter="RT_OPAQUE_HIT" offset="80" />
|
||||
<event name="MaliRTURay" title="Ray tracing started rays" description="The number of rays started." units="rays" counter="RT_RAYS_STARTED" offset="84" />
|
||||
<event name="MaliRTUTri" title="Ray tracing triangle nodes tested" description="The number of triangle nodes tested." units="nodes" counter="RT_RAY_TRI" offset="70" />
|
||||
<event name="MaliRTUTriBin1" title="Ray tracing triangle nodes with 1-4 rays" description="The number of triangle nodes with 1 to 4 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_1_4" offset="72" />
|
||||
<event name="MaliRTUTriBin13" title="Ray tracing triangle nodes with 13-16 rays" description="The number of triangle nodes with 13 to 16 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_13_16" offset="75" />
|
||||
<event name="MaliRTUTriBin5" title="Ray tracing triangle nodes with 5-8 rays" description="The number of triangle nodes with 5 to 8 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_5_8" offset="73" />
|
||||
<event name="MaliRTUTriBin9" title="Ray tracing triangle nodes with 9-12 rays" description="The number of triangle nodes with 9 to 12 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_9_12" offset="74" />
|
||||
<event name="MaliRTUTriIssueCy" title="Ray tracing triangle tester issue cycles" description="The number of active issue cycles for the ray tracing triangle test unit." units="cycles" counter="RT_RAY_TRI_ISSUED" offset="86" />
|
||||
<event name="MaliRTUUtil" title="Ray tracing unit utilization" description="The percentage utilization of the ray tracing unit." units="percent" equation="((max((RT_RAY_BOX_ISSUED), (RT_RAY_TRI_ISSUED))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="61" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="63" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((ITER_FRAG_TASK_COMPLETED) * (64) * (64))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="(max((TEX_FILT_NUM_OPERATIONS), (TEX_TFCH_NUM_TCL_OPERATIONS), (TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS), (TEX_CFCH_NUM_RP_OPERATIONS), (TEX_MSGI_NUM_FLITS), (TEX_MSGO_NUM_FLITS), (TEX_CFCH_NUM_L1_CL_OPERATIONS), (TEX_CFCH_NUM_L1_CT_OPERATIONS), (TEX_TIDX_NUM_OPERATIONS))) / ((((TEX_MSGO_NUM_MSG) * 2) - (TEX_MSGO_NUM_SINGLE_QUAD_MSG)) * 4)" />
|
||||
<event name="MaliTexCacheComplexLoadCy" title="Complex texture load cycles" description="The number of cycles loading complex texture formats." units="cycles" counter="TEX_CFCH_NUM_RP_OPERATIONS" offset="93" />
|
||||
<event name="MaliTexCacheLookupCy" title="Texture cache lookup cycles" description="The number of cycles returning data from the texture cache." units="cycles" counter="TEX_TFCH_NUM_TCL_OPERATIONS" offset="92" />
|
||||
<event name="MaliTexCacheSimpleLoadCy" title="Simple texture load cycles" description="The number of cycles loading simple texture formats." units="cycles" counter="TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS" offset="88" />
|
||||
<event name="MaliTexClkActiveCy" title="Texture unit clock active cycles" description="The number of cycles the texture unit was active." units="cycles" counter="TEX_TEXP_CLK_ACTIVE" offset="96" />
|
||||
<event name="MaliTexClkStarvedCy" title="Texture causing starvation cycles" description="The number of cycles the texture unit was active but did not return a texture sample." units="cycles" counter="TEX_MSGI_CLK_STARVED" offset="95" />
|
||||
<event name="MaliTexDataFetchStallCy" title="Texture fetch stall cycles" description="The number of cycles when a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" counter="TEX_TFCH_CLK_STALLED" offset="37" />
|
||||
<event name="MaliTexDescStallCy" title="Texture descriptor stall cycles" description="The number of cycles when a quad is stalled on texture descriptor fetch." units="cycles" counter="TEX_DFCH_CLK_STALLED" offset="36" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="39" />
|
||||
<event name="MaliTexFiltStallCy" title="Texture filtering stall cycles" description="The number of cycles when the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" offset="38" />
|
||||
<event name="MaliTexInBt" title="Texture message read beats" description="The number of texture request message data beats." units="beats" counter="TEX_MSGI_NUM_FLITS" offset="35" />
|
||||
<event name="MaliTexInBusUtil" title="Texture input bus utilization" description="The percentage load on the texture message input bus." units="percent" equation="((TEX_MSGI_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexIndexCy" title="Texture index calculation cycles" description="The number of cycles computing texel index values." units="cycles" counter="TEX_TIDX_NUM_OPERATIONS" offset="94" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="max((TEX_FILT_NUM_OPERATIONS), (TEX_TFCH_NUM_TCL_OPERATIONS), (TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS), (TEX_CFCH_NUM_RP_OPERATIONS), (TEX_MSGI_NUM_FLITS), (TEX_MSGO_NUM_FLITS), (TEX_CFCH_NUM_L1_CL_OPERATIONS), (TEX_CFCH_NUM_L1_CT_OPERATIONS), (TEX_TIDX_NUM_OPERATIONS))" />
|
||||
<event name="MaliTexL1CacheLoadCy" title="L1 texture cache load cycles" description="The number of cycles the L1 cache is being loaded." units="cycles" counter="TEX_CFCH_NUM_OUTPUT_OPERATIONS" offset="87" />
|
||||
<event name="MaliTexL1CacheLookupCy" title="L1 texture cache lookup cycles" description="The number of cycles the L1 cache is being accessed." units="cycles" counter="TEX_CFCH_NUM_L1_CT_OPERATIONS" offset="90" />
|
||||
<event name="MaliTexL1CacheOutputCy" title="L1 texture cache output cycles" description="The number of cycles the L1 cache is returning data." units="cycles" counter="TEX_CFCH_NUM_L1_CL_OPERATIONS" offset="89" />
|
||||
<event name="MaliTexOutBt" title="Texture message write beats" description="The number of texture response message data beats." units="beats" counter="TEX_MSGO_NUM_FLITS" offset="43" />
|
||||
<event name="MaliTexOutBusUtil" title="Texture output bus utilization" description="The percentage load on the texture message output bus." units="percent" equation="((TEX_MSGO_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexOutMsg" title="Texture messages" description="The number of output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_MSG" offset="42" />
|
||||
<event name="MaliTexOutSingleMsg" title="Texture messages with single quad" description="The number of single quad output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_SINGLE_QUAD_MSG" offset="91" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" equation="((TEX_MSGO_NUM_MSG) * 2) - (TEX_MSGO_NUM_SINGLE_QUAD_MSG)" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="(((TEX_MSGO_NUM_MSG) * 2) - (TEX_MSGO_NUM_SINGLE_QUAD_MSG)) * 4" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="((max((TEX_FILT_NUM_OPERATIONS), (TEX_TFCH_NUM_TCL_OPERATIONS), (TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS), (TEX_CFCH_NUM_RP_OPERATIONS), (TEX_MSGI_NUM_FLITS), (TEX_MSGO_NUM_FLITS), (TEX_CFCH_NUM_L1_CL_OPERATIONS), (TEX_CFCH_NUM_L1_CT_OPERATIONS), (TEX_TIDX_NUM_OPERATIONS))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (4)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (4)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (4)) + ((VARY_SLOT_16) / (4))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (4)) + ((VARY_SLOT_16) / (4))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceCullPrim" title="Facing test culled primitives" description="The number of primitives that are culled by facing tests." units="primitives" counter="PRIM_FACE_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceCullRate" title="Facing plane test cull percentage" description="The percentage of primitives culled by the facing test." units="percent" equation="((PRIM_FACE_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FRUSTUM_CULLED) - (PRIM_SCISSOR_CULLED))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPlaneCullPrim" title="Frustum test culled primitives" description="The number of primitives that are culled by frustum tests." units="primitives" counter="PRIM_FRUSTUM_CULLED" offset="13" />
|
||||
<event name="MaliGeomPlaneCullRate" title="Frustum test cull percentage" description="The percentage of primitives culled by the frustum test." units="percent" equation="((PRIM_FRUSTUM_CULLED) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FRUSTUM_CULLED) - (PRIM_SCISSOR_CULLED) - (PRIM_FACE_CULLED))) * 100" />
|
||||
<event name="MaliGeomScissorCullPrim" title="Scissor test culled primitives" description="The number of primitives that are culled by the scissor test." units="primitives" counter="PRIM_SCISSOR_CULLED" offset="70" />
|
||||
<event name="MaliGeomScissorCullRate" title="Scissor test cull percentage" description="The percentage of primitives culled by the scissor test." units="percent" equation="((PRIM_SCISSOR_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FRUSTUM_CULLED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVisibleDVSPrim" title="Visible primitives using DVS" description="The number of primitives using DVS that are visible after culling." units="primitives" counter="PRIM_VISIBLE_DVS" offset="71" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerPrimAsPosShadStallCy" title="Primitive assembly position shading stall cycles" description="The number of cycles when primitive assembly is waiting for position shading." units="cycles" counter="PRIMASSY_POS_SHADER_WAIT" offset="64" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
</category>
|
||||
</metrics>
|
||||
335
src/panfrost/perf/generated/G725.xml
Normal file
335
src/panfrost/perf/generated/G725.xml
Normal file
|
|
@ -0,0 +1,335 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G725">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliBinningQueueActiveCy" title="Binning phase queue active cycles" description="The number of cycles that the binning phase queue is processing work. The binning phase includes position shading, culling, and binning." units="cycles" equation="(ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)" />
|
||||
<event name="MaliBinningQueueAssignStallCy" title="Binning phase queue endpoint stall cycles" description="The number of cycles the binning phase queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_TILER_READY_BLOCKED" offset="70" />
|
||||
<event name="MaliBinningQueueIRQActiveCy" title="Binning phase queue interrupt pending cycles" description="The number of cycles that the binning phase queue IRQ was pending." units="cycles" counter="ITER_TILER_IRQ_ACTIVE" offset="68" />
|
||||
<event name="MaliBinningQueueJob" title="Binning phase jobs" description="The number of binning phase jobs processed." units="jobs" counter="ITER_TILER_JOB_COMPLETED" offset="65" />
|
||||
<event name="MaliBinningQueueTask" title="Binning phase tasks" description="The number of binning phase tasks processed." units="tasks" counter="ITER_TILER_IDVS_TASK_COMPLETED" offset="66" />
|
||||
<event name="MaliBinningQueueUtil" title="Binning phase queue utilization" description="The binning phase queue utilization compared against the GPU active cycles. The binning phase includes position shading, culling, and binning." units="percent" equation="(((ITER_TILER_ACTIVE) - (ITER_TILER_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliBinningQueuedCy" title="Binning phase work queued cycles" description="The number of cycles that the binning phase queue has work queued. The binning phase includes position shading, culling, and binning." units="cycles" counter="ITER_TILER_ACTIVE" offset="64" />
|
||||
<event name="MaliCS0WaitStallCy" title="Command stream 0 wait stall cycles" description="The number of cycles that command stream interface 0 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF0_WAIT_BLOCKED" offset="84" />
|
||||
<event name="MaliCS1WaitStallCy" title="Command stream 1 wait stall cycles" description="The number of cycles that command stream interface 1 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF1_WAIT_BLOCKED" offset="90" />
|
||||
<event name="MaliCS2WaitStallCy" title="Command stream 2 wait stall cycles" description="The number of cycles that command stream interface 2 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF2_WAIT_BLOCKED" offset="96" />
|
||||
<event name="MaliCS3WaitStallCy" title="Command stream 3 wait stall cycles" description="The number of cycles that command stream interface 3 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF3_WAIT_BLOCKED" offset="102" />
|
||||
<event name="MaliCS4WaitStallCy" title="Command stream 4 wait stall cycles" description="The number of cycles that command stream interface 4 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF4_WAIT_BLOCKED" offset="108" />
|
||||
<event name="MaliCS5WaitStallCy" title="Command stream 5 wait stall cycles" description="The number of cycles that command stream interface 5 was blocked because of a scheduling dependency." units="cycles" counter="CSHWIF5_WAIT_BLOCKED" offset="114" />
|
||||
<event name="MaliCSDoorbellIRQCy" title="Command stream doorbell interrupt pending cycles" description="The number of cycles that command stream doorbell has an IRQ pending." units="cycles" counter="DOORBELL_IRQ_ACTIVE" offset="14" />
|
||||
<event name="MaliCSFCEUActiveCy" title="Command execution unit active cycles" description="The number of cycles that the CEU is processing commands." units="cycles" counter="CEU_ACTIVE" offset="16" />
|
||||
<event name="MaliCSFCEUUtil" title="Command execution unit utilization" description="The CSF command execution unit utilization compared against the GPU active cycles." units="percent" equation="((CEU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCSFCS0ActiveCy" title="Command stream 0 active cycles" description="The number of cycles that command stream interface 0 contained an enabled command stream." units="cycles" counter="CSHWIF0_ENABLED" offset="80" />
|
||||
<event name="MaliCSFCS1ActiveCy" title="Command stream 1 active cycles" description="The number of cycles that command stream interface 1 contained an enabled command stream." units="cycles" counter="CSHWIF1_ENABLED" offset="86" />
|
||||
<event name="MaliCSFCS2ActiveCy" title="Command stream 2 active cycles" description="The number of cycles that command stream interface 2 contained an enabled command stream." units="cycles" counter="CSHWIF2_ENABLED" offset="92" />
|
||||
<event name="MaliCSFCS3ActiveCy" title="Command stream 3 active cycles" description="The number of cycles that command stream interface 3 contained an enabled command stream." units="cycles" counter="CSHWIF3_ENABLED" offset="98" />
|
||||
<event name="MaliCSFCS4ActiveCy" title="Command stream 4 active cycles" description="The number of cycles that command stream interface 4 contained an enabled command stream." units="cycles" counter="CSHWIF4_ENABLED" offset="104" />
|
||||
<event name="MaliCSFCS5ActiveCy" title="Command stream 5 active cycles" description="The number of cycles that command stream interface 5 contained an enabled command stream." units="cycles" counter="CSHWIF5_ENABLED" offset="110" />
|
||||
<event name="MaliCSFLSUActiveCy" title="Command load/store unit active cycles" description="The number of cycles that the load-store unit is processing commands." units="cycles" counter="LSU_ACTIVE" offset="21" />
|
||||
<event name="MaliCSFLSUUtil" title="Command load/store unit utilization" description="The CSF load/store unit utilization compared against the GPU active cycles." units="percent" equation="((LSU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCSFMCUActiveCy" title="MCU active cycles" description="The number of cycles when the CSF front-end MCU is actively processing." units="cycles" counter="MCU_ACTIVE" offset="5" />
|
||||
<event name="MaliCSFMCUUtil" title="Microcontroller utilization" description="The CSF MCU utilization compared against the GPU active cycles." units="percent" equation="((MCU_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCompQueueActiveCy" title="Compute queue active cycles" description="The number of cycles that the compute queue is processing work." units="cycles" equation="(ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)" />
|
||||
<event name="MaliCompQueueAssignStallCy" title="Compute queue endpoint stall cycles" description="The number of cycles the compute queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_COMP_READY_BLOCKED" offset="38" />
|
||||
<event name="MaliCompQueueDrainStallCy" title="Compute queue endpoint drain stall cycles" description="The number of cycles the compute queue is waiting for endpoints to drain." units="cycles" counter="ITER_COMP_EP_DRAIN" offset="39" />
|
||||
<event name="MaliCompQueueIRQActiveCy" title="Compute queue interrupt pending cycles" description="The number of cycles that the compute queue IRQ was pending." units="cycles" counter="ITER_COMP_IRQ_ACTIVE" offset="36" />
|
||||
<event name="MaliCompQueueJob" title="Compute jobs" description="The number of compute jobs processed." units="jobs" counter="ITER_COMP_JOB_COMPLETED" offset="33" />
|
||||
<event name="MaliCompQueueTask" title="Compute tasks" description="The number of compute tasks processed." units="tasks" counter="ITER_COMP_TASK_COMPLETED" offset="34" />
|
||||
<event name="MaliCompQueueUtil" title="Compute queue utilization" description="The compute queue utilization compared against the GPU active cycles." units="percent" equation="(((ITER_COMP_ACTIVE) - (ITER_COMP_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliCompQueuedCy" title="Compute work queued cycles" description="The number of cycles that the compute queue has work queued." units="cycles" counter="ITER_COMP_ACTIVE" offset="32" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="4" />
|
||||
<event name="MaliGPUAnyQueueActiveCy" title="Any queue active cycles" description="The number of cycles when any GPU queue is active." units="cycles" counter="GPU_ITER_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQ" title="GPU interrupts" description="The number of interrupts raised by the GPU to the CPU." units="interrupts" counter="GPU_IRQ_COUNT" offset="11" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="10" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliMainQueueActiveCy" title="Main phase queue active cycles" description="The number of cycles that the main phase queue is processing work. The main phase includes deferred vertex shading and all fragment shading." units="cycles" equation="(ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)" />
|
||||
<event name="MaliMainQueueAssignStallCy" title="Main phase queue endpoint stall cycles" description="The number of cycles the main phase queue is waiting for endpoints to be assigned." units="cycles" counter="ITER_FRAG_READY_BLOCKED" offset="54" />
|
||||
<event name="MaliMainQueueIRQActiveCy" title="Main phase queue interrupt pending cycles" description="The number of cycles that the main phase queue IRQ was pending." units="cycles" counter="ITER_FRAG_IRQ_ACTIVE" offset="52" />
|
||||
<event name="MaliMainQueueJob" title="Main phase jobs" description="The number of main phase jobs processed." units="jobs" counter="ITER_FRAG_JOB_COMPLETED" offset="49" />
|
||||
<event name="MaliMainQueueTask" title="Main phase tasks" description="The number of fragment tasks processed." units="tasks" counter="ITER_FRAG_TASK_COMPLETED" offset="50" />
|
||||
<event name="MaliMainQueueUtil" title="Main phase queue utilization" description="The main phase queue utilization compared against the GPU active cycles. The main phase includes deferred vertex shading and all fragment shading." units="percent" equation="(((ITER_FRAG_ACTIVE) - (ITER_FRAG_READY_BLOCKED)) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliMainQueuedCy" title="Main phase work queued cycles" description="The number of cycles that the main phase queue has work queued. The main phase includes deferred vertex shading and all fragment shading." units="cycles" counter="ITER_FRAG_ACTIVE" offset="48" />
|
||||
<event name="MaliTilerQueueDrainStallCy" title="Binning phase queue endpoint drain stall cycles" description="The number of cycles the binning phase queue is waiting for endpoints to drain." units="cycles" counter="ITER_TILER_EP_DRAIN" offset="71" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheCleanUnique" title="Input internal clean unique requests" description="The number of L2 cache line clean unique requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_CU" offset="13" />
|
||||
<event name="MaliL2CacheEvict" title="Input internal evict requests" description="The number of L2 cache line evict requests from internal requesters." units="requests" counter="L2_RD_MSG_IN_EVICT" offset="12" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="13" />
|
||||
<event name="MaliL2CacheFlushCy" title="L2 cache flush cycles" description="The number of cycles spent flushing GPU L2 caches." units="cycles" counter="CACHE_FLUSH_CYCLES" offset="12" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="max((((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)) - (EXEC_INSTR_SLOT_1)), (EXEC_INSTR_SLOT_1), (EXEC_INSTR_SFU) * 4)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="((max((((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)) - (EXEC_INSTR_SLOT_1)), (EXEC_INSTR_SLOT_1), (EXEC_INSTR_SFU) * 4)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAnyActiveCy" title="Any workload active cycles" description="The number of cycles when the shader core is processing either a non-fragment workload or a fragment workload." units="cycles" counter="SHADER_CORE_ACTIVE" offset="53" />
|
||||
<event name="MaliAnyUtil" title="Shader core clock ratio" description="An estimate of shader core use relative to the GPU top-level clock." units="percent" equation="((SHADER_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCompOrBinningActiveCy" title="Compute or binning phase active cycles" description="The number of cycles when the shader core is processing some compute or binning phase workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliCompOrBinningUtil" title="Compute or binning phase utilization" description="The utilization of the shader core compute or binning phase path." units="percent" equation="((COMPUTE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreAllRegsWarp" title="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps" counter="WARP_REG_SIZE_64" offset="17" />
|
||||
<event name="MaliCoreAllRegsWarpRate" title="All registers warp percentage" description="The percentage of warps that require more than 32 registers." units="percent" equation="((WARP_REG_SIZE_64) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreFragWarpOcc" title="Fragment warp occupancy" description="The thread occupancy of the fragment warps." units="percent" equation="((FRAG_SHADER_THREADS) / ((FRAG_WARPS) * (16))) * 100" />
|
||||
<event name="MaliCoreFullWarp" title="Full warps" description="The number of warps that have a full thread slot allocation." units="warps" counter="FULL_QUAD_WARPS" offset="21" />
|
||||
<event name="MaliCoreFullWarpRate" title="Full warp percentage" description="The percentage of warps that have a full thread slot allocation." units="percent" equation="((FULL_QUAD_WARPS) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliDefVertWarp" title="Deferred vertex warps" description="The number of deferred vertex warps created." units="warps" counter="DVS_WARPS" offset="106" />
|
||||
<event name="MaliEngArithInstr" title="Arithmetic instruction issue cycles" description="The total number of instructions issued to the FMA, CVT, and SFU pipes." units="instructions" equation="(EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)" />
|
||||
<event name="MaliEngAttrBackpressureCy" title="Attribute unit backpressure cycles" description="The number of cycles new work could not be sent to the attribute unit." units="cycles" counter="EXEC_MSG_STALLED_ATTR" offset="117" />
|
||||
<event name="MaliEngAttrBackpressureRate" title="Attribute unit backpressure percentage" description="The percentage of cycles new work could not be sent to the attribute unit." units="percent" equation="((EXEC_MSG_STALLED_ATTR) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngBlendBackpressureCy" title="Blend unit backpressure cycles" description="The number of cycles new work could not be sent to the blend unit." units="cycles" counter="EXEC_MSG_STALLED_BLEND" offset="114" />
|
||||
<event name="MaliEngBlendBackpressureRate" title="Blend unit backpressure percentage" description="The percentage of cycles new work could not be sent to the blend unit." units="percent" equation="((EXEC_MSG_STALLED_BLEND) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngCVTInstr" title="Arithmetic CVT pipe instructions" description="The number of instructions issued to the CVT pipe." units="instructions" counter="EXEC_INSTR_CVT" offset="28" />
|
||||
<event name="MaliEngCVTPipeUtil" title="CVT pipe utilization" description="Defines the utilization of the CVT pipes." units="percent" equation="((EXEC_INSTR_CVT) / (2 * (EXEC_CORE_ACTIVE))) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="31" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngFMAInstr" title="Arithmetic FMA pipe instructions" description="The number of instructions issued to the FMA pipe." units="instructions" counter="EXEC_INSTR_FMA" offset="27" />
|
||||
<event name="MaliEngFMAPipeUtil" title="FMA pipe utilization" description="The utilization of the FMA pipes." units="percent" equation="((EXEC_INSTR_FMA) / (2 * (EXEC_CORE_ACTIVE))) * 100" />
|
||||
<event name="MaliEngICacheMiss" title="Instruction cache misses" description="The number of instruction cache misses." units="requests" counter="EXEC_ICACHE_MISS" offset="32" />
|
||||
<event name="MaliEngLSBackpressureCy" title="Load/store unit backpressure cycles" description="The number of cycles new work could not be sent to the load/store unit." units="cycles" counter="EXEC_MSG_STALLED_LSC" offset="116" />
|
||||
<event name="MaliEngLSBackpressureRate" title="Load/store unit backpressure percentage" description="The percentage of cycles new work could not be sent to the load/store unit." units="percent" equation="((EXEC_MSG_STALLED_LSC) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngNarrowInstr" title="Narrow arithmetic instructions" description="The number of narrow arithmetic instructions." units="instructions" counter="EXEC_INSTR_NARROW" offset="5" />
|
||||
<event name="MaliEngNarrowInstrRate" title="Narrow arithmetic percentage" description="The percentage of arithmetic instructions that operate on 8/16-bit types." units="percent" equation="((EXEC_INSTR_NARROW) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngSFUInstr" title="Arithmetic SFU pipe instructions" description="The number of instructions issued to the SFU pipe." units="instructions" counter="EXEC_INSTR_SFU" offset="29" />
|
||||
<event name="MaliEngSFUPipeUtil" title="SFU pipe utilization" description="The utilization of the SFU pipe." units="percent" equation="(((EXEC_INSTR_SFU) * 4) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngSWBlendInstr" title="Blend shader instructions" description="The number of blend shader invocations run." units="instructions" counter="CALL_BLEND_SHADER" offset="34" />
|
||||
<event name="MaliEngSWBlendRate" title="Shader blend percentage" description="The percentage of fragments that use shader-based blending." units="percent" equation="(((CALL_BLEND_SHADER) * 4) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliEngSlot0IssueCy" title="Slot 0 arithmetic issue cycles" description="The number of arithmetic issue cycles to slot 0." units="cycles" equation="((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)) - (EXEC_INSTR_SLOT_1)" />
|
||||
<event name="MaliEngSlot1IssueCy" title="Slot 1 arithmetic issue cycles" description="The number of arithmetic issue cycles to slot 1." units="cycles" counter="EXEC_INSTR_SLOT_1" offset="118" />
|
||||
<event name="MaliEngSlotAnyIssueCy" title="Any slot arithmetic issue cycles" description="The number of arithmetic issue cycles that issue to either issue slot." units="cycles" counter="EXEC_ISSUE_SLOT_ANY" offset="119" />
|
||||
<event name="MaliEngStarveCy" title="Processing unit starvation cycles" description="The number of cycles when the processing unit is starved of work." units="cycles" counter="EXEC_STARVE_ARITH" offset="33" />
|
||||
<event name="MaliEngTexBackpressureCy" title="Texture unit backpressure cycles" description="The number of cycles new work could not be sent to the texture unit." units="cycles" counter="EXEC_MSG_STALLED_TEX" offset="112" />
|
||||
<event name="MaliEngTexBackpressureRate" title="Texture unit backpressure percentage" description="The percentage of cycles new work could not be sent to the texture unit." units="percent" equation="((EXEC_MSG_STALLED_TEX) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngVarBackpressureCy" title="Varying unit backpressure cycles" description="The number of cycles new work could not be sent to the varying unit." units="cycles" counter="EXEC_MSG_STALLED_VARY" offset="113" />
|
||||
<event name="MaliEngVarBackpressureRate" title="Varying unit backpressure percentage" description="The percentage of cycles new work could not be sent to the varying unit." units="percent" equation="((EXEC_MSG_STALLED_VARY) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngZSBackpressureCy" title="ZS unit backpressure cycles" description="The number of cycles new work could not be sent to the depth/stencil test unit." units="cycles" counter="EXEC_MSG_STALLED_ZS" offset="115" />
|
||||
<event name="MaliEngZSBackpressureRate" title="ZS unit backpressure percentage" description="The percentage of cycles new work could not be sent to the depth/stencil test unit." units="percent" equation="((EXEC_MSG_STALLED_ZS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragInputPrim" title="Input fragment primitives" description="The number of unique primitives loaded by the fragment front-end." units="primitives" equation="((FRAG_PRIMITIVES_OUT) + (FRAG_PRIMITIVES_HSR_CULLED)) - (FRAG_PRIMITIVES_OUT_PRE_PASS)" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (4 * (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (4 * (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliFragMainPassStallCy" title="Fragment main pass stall cycles" description="The number of cycles when the fragment main pass is stalled waiting for prepass results." units="cycles" counter="FRAG_MAIN_PASS_STALLED_BY_PRE_PASS" offset="105" />
|
||||
<event name="MaliFragMainPassStallRate" title="Fragment main pass stall percentage" description="The percentage of cycles when the fragment main pass is stalled by the fragment prepass." units="percent" equation="((FRAG_MAIN_PASS_STALLED_BY_PRE_PASS) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragMainThread" title="Fragment main pass threads" description="The number of fragment threads started in the main pass." units="threads" equation="((FRAG_WARPS) - (FRAG_WARPS_PRE_PASS)) * (16)" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="(FRAG_SHADER_THREADS) / ((ITER_FRAG_TASK_COMPLETED) * (64) * (64))" />
|
||||
<event name="MaliFragPrepassCullPrim" title="Fragment prepass culled primitives" description="The number of primitives culled by the fragment prepass." units="primitives" counter="FRAG_PRIMITIVES_HSR_CULLED" offset="98" />
|
||||
<event name="MaliFragPrepassCullPrimRate" title="Fragment prepass primitive culling percentage" description="The percentage of primitives culled by the fragment prepass." units="percent" equation="((FRAG_PRIMITIVES_HSR_CULLED) / (((FRAG_PRIMITIVES_OUT) + (FRAG_PRIMITIVES_HSR_CULLED)) - (FRAG_PRIMITIVES_OUT_PRE_PASS))) * 100" />
|
||||
<event name="MaliFragPrepassEZSUpdateQd" title="Fragment prepass early ZS updated quads" description="The number of quads that update the fragment prepass using early ZS." units="quads" counter="FRAG_QUADS_HSR_BUF_EZS_UPDATE" offset="101" />
|
||||
<event name="MaliFragPrepassKillQd" title="Fragment prepass killed quads" description="The number of quads that are killed by the fragment prepass." units="quads" counter="FRAG_QUADS_HSR_BUF_KILLED" offset="103" />
|
||||
<event name="MaliFragPrepassKillRate" title="Fragment prepass killed quad percentage" description="The percentage of tested quads that are killed by the fragment prepass." units="percent" equation="((FRAG_QUADS_HSR_BUF_KILLED) / (FRAG_QUADS_HSR_BUF_TEST)) * 100" />
|
||||
<event name="MaliFragPrepassPrim" title="Loaded fragment prepass primitives" description="The number of primitives loaded by the fragment front-end for the fragment prepass." units="primitives" counter="FRAG_PRIMITIVES_OUT_PRE_PASS" offset="99" />
|
||||
<event name="MaliFragPrepassPrimRate" title="Fragment prepass primitive percentage" description="The percentage of primitives processed by fragment prepass hidden surface removal." units="percent" equation="((FRAG_PRIMITIVES_OUT_PRE_PASS) / (((FRAG_PRIMITIVES_OUT) + (FRAG_PRIMITIVES_HSR_CULLED)) - (FRAG_PRIMITIVES_OUT_PRE_PASS))) * 100" />
|
||||
<event name="MaliFragPrepassSkipPrimRate" title="Fragment prepass skipped primitive percentage" description="The percentage of primitives that are skipped by the fragment prepass." units="percent" equation="((FRAG_PRIMITIVES_HSR_DISABLED) / (((FRAG_PRIMITIVES_OUT) + (FRAG_PRIMITIVES_HSR_CULLED)) - (FRAG_PRIMITIVES_OUT_PRE_PASS))) * 100" />
|
||||
<event name="MaliFragPrepassSkippedPrim" title="Fragment prepass skipped primitives" description="The number of primitives that are skipped by the fragment prepass." units="primitives" counter="FRAG_PRIMITIVES_HSR_DISABLED" offset="100" />
|
||||
<event name="MaliFragPrepassTestQd" title="Fragment prepass tested quads" description="The number of quads that are tested by the fragment prepass." units="quads" counter="FRAG_QUADS_HSR_BUF_TEST" offset="102" />
|
||||
<event name="MaliFragPrepassThread" title="Fragment prepass threads" description="The number of fragment threads started in the prepass." units="threads" equation="(FRAG_WARPS_PRE_PASS) * (16)" />
|
||||
<event name="MaliFragPrepassWarp" title="Fragment prepass warps" description="The number of fragment prepass warps created." units="warps" counter="FRAG_WARPS_PRE_PASS" offset="104" />
|
||||
<event name="MaliFragPrepassWarpRate" title="Fragment prepass warp percentage" description="The percentage of warps being processed by the fragment prepass." units="percent" equation="((FRAG_WARPS_PRE_PASS) / ((FRAG_WARPS) - (FRAG_WARPS_PRE_PASS))) * 100" />
|
||||
<event name="MaliFragPrim" title="Loaded fragment primitives" description="The number of primitives loaded by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES_OUT" offset="97" />
|
||||
<event name="MaliFragRastCoarseQd" title="Rasterized coarse quads" description="The number of coarse quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_COARSE" offset="68" />
|
||||
<event name="MaliFragRastPartQd" title="Partial rasterized fine quads" description="The number of rasterized fine quads created with partial coverage." units="quads" counter="FRAG_PARTIAL_QUADS_RAST" offset="10" />
|
||||
<event name="MaliFragRastPartQdRate" title="Partial coverage percentage" description="The percentage of rasterized fine quads that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_QUADS_RAST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragShadRate" title="Fragment shading rate" description="The percentage of coarse quads generated relative to fine quads rasterized." units="percent" equation="((FRAG_QUADS_COARSE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="((FRAG_WARPS) * (16)) / 4" />
|
||||
<event name="MaliFragThread" title="All fragment threads" description="The number of fragment threads started in the prepass and main pass." units="threads" counter="FRAG_SHADER_THREADS" offset="69" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / (((FRAG_WARPS) - (FRAG_WARPS_PRE_PASS)) * (16))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (4 * (FRAG_PTILES))) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliMainActiveCy" title="Main phase active cycles" description="The number of cycles when the shader core is processing a main phase workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliMainUtil" title="Main phase utilization" description="The utilization of the shader core main phase path." units="percent" equation="((FRAG_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragTask" title="Non-main phase core tasks" description="The number of non-main phase tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (16)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (16))" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliRTUBox" title="Ray tracing box tests" description="The number of acceleration structure bounding boxes tested." units="boxes" counter="RT_RAY_BOX" offset="71" />
|
||||
<event name="MaliRTUBoxBin1" title="Ray tracing box nodes with 1-4 rays" description="The number of acceleration structure box nodes with 1 to 4 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_1_4" offset="76" />
|
||||
<event name="MaliRTUBoxBin13" title="Ray tracing box nodes with 13-16 rays" description="The number of acceleration structure box nodes with 13 to 16 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_13_16" offset="79" />
|
||||
<event name="MaliRTUBoxBin5" title="Ray tracing box nodes with 5-8 rays" description="The number of acceleration structure box nodes with 5 to 8 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_5_8" offset="77" />
|
||||
<event name="MaliRTUBoxBin9" title="Ray tracing box nodes with 9-12 rays" description="The number of acceleration structure box nodes with 9 to 12 active rays in the warp." units="nodes" counter="RT_RAY_BOX_BIN_9_12" offset="78" />
|
||||
<event name="MaliRTUBoxIssueCy" title="Ray tracing box tester issue cycles" description="The number of active issue cycles for the ray tracing box test unit." units="cycles" counter="RT_RAY_BOX_ISSUED" offset="85" />
|
||||
<event name="MaliRTUFirstHitTerm" title="Ray tracing first hit terminations" description="The number of rays that terminate on their first hit." units="rays" counter="RT_TERM_FIRST_HIT" offset="82" />
|
||||
<event name="MaliRTUIssueCy" title="Ray tracing unit issue cycles" description="The number of cycles the ray tracing unit was issuing work." units="cycles" equation="max((RT_RAY_BOX_ISSUED), (RT_RAY_TRI_ISSUED))" />
|
||||
<event name="MaliRTUMiss" title="Ray tracing triangle test misses" description="The number of triangle intersection tests that do not intersect." units="rays" counter="RT_MISS" offset="83" />
|
||||
<event name="MaliRTUNonOpaqueHit" title="Ray tracing non-opaque triangle hits" description="The number of non-opaque triangle hits." units="tests" counter="RT_NON_OPAQUE_HIT" offset="81" />
|
||||
<event name="MaliRTUOpaqueHit" title="Ray tracing opaque triangle hits" description="The number of opaque triangle hits." units="tests" counter="RT_OPAQUE_HIT" offset="80" />
|
||||
<event name="MaliRTURay" title="Ray tracing started rays" description="The number of rays started." units="rays" counter="RT_RAYS_STARTED" offset="84" />
|
||||
<event name="MaliRTUTri" title="Ray tracing triangle nodes tested" description="The number of triangle nodes tested." units="nodes" counter="RT_RAY_TRI" offset="70" />
|
||||
<event name="MaliRTUTriBin1" title="Ray tracing triangle nodes with 1-4 rays" description="The number of triangle nodes with 1 to 4 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_1_4" offset="72" />
|
||||
<event name="MaliRTUTriBin13" title="Ray tracing triangle nodes with 13-16 rays" description="The number of triangle nodes with 13 to 16 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_13_16" offset="75" />
|
||||
<event name="MaliRTUTriBin5" title="Ray tracing triangle nodes with 5-8 rays" description="The number of triangle nodes with 5 to 8 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_5_8" offset="73" />
|
||||
<event name="MaliRTUTriBin9" title="Ray tracing triangle nodes with 9-12 rays" description="The number of triangle nodes with 9 to 12 active rays in the warp." units="nodes" counter="RT_RAY_TRI_BIN_9_12" offset="74" />
|
||||
<event name="MaliRTUTriIssueCy" title="Ray tracing triangle tester issue cycles" description="The number of active issue cycles for the ray tracing triangle test unit." units="cycles" counter="RT_RAY_TRI_ISSUED" offset="86" />
|
||||
<event name="MaliRTUUtil" title="Ray tracing unit utilization" description="The percentage utilization of the ray tracing unit." units="percent" equation="((max((RT_RAY_BOX_ISSUED), (RT_RAY_TRI_ISSUED))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="61" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="63" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((ITER_FRAG_TASK_COMPLETED) * (64) * (64))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="(max((TEX_FILT_NUM_OPERATIONS), (TEX_TFCH_NUM_TCL_OPERATIONS), (TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS), (TEX_CFCH_NUM_RP_OPERATIONS), (TEX_MSGI_NUM_FLITS), (TEX_MSGO_NUM_FLITS), (TEX_CFCH_NUM_L1_CL_OPERATIONS), (TEX_CFCH_NUM_L1_CT_OPERATIONS), (TEX_TIDX_NUM_OPERATIONS))) / ((((TEX_MSGO_NUM_MSG) * 2) - (TEX_MSGO_NUM_SINGLE_QUAD_MSG)) * 4)" />
|
||||
<event name="MaliTexCacheComplexLoadCy" title="Complex texture load cycles" description="The number of cycles loading complex texture formats." units="cycles" counter="TEX_CFCH_NUM_RP_OPERATIONS" offset="93" />
|
||||
<event name="MaliTexCacheLookupCy" title="Texture cache lookup cycles" description="The number of cycles returning data from the texture cache." units="cycles" counter="TEX_TFCH_NUM_TCL_OPERATIONS" offset="92" />
|
||||
<event name="MaliTexCacheSimpleLoadCy" title="Simple texture load cycles" description="The number of cycles loading simple texture formats." units="cycles" counter="TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS" offset="88" />
|
||||
<event name="MaliTexClkActiveCy" title="Texture unit clock active cycles" description="The number of cycles the texture unit was active." units="cycles" counter="TEX_TEXP_CLK_ACTIVE" offset="96" />
|
||||
<event name="MaliTexClkStarvedCy" title="Texture causing starvation cycles" description="The number of cycles the texture unit was active but did not return a texture sample." units="cycles" counter="TEX_MSGI_CLK_STARVED" offset="95" />
|
||||
<event name="MaliTexDataFetchStallCy" title="Texture fetch stall cycles" description="The number of cycles when a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" counter="TEX_TFCH_CLK_STALLED" offset="37" />
|
||||
<event name="MaliTexDescStallCy" title="Texture descriptor stall cycles" description="The number of cycles when a quad is stalled on texture descriptor fetch." units="cycles" counter="TEX_DFCH_CLK_STALLED" offset="36" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="39" />
|
||||
<event name="MaliTexFiltStallCy" title="Texture filtering stall cycles" description="The number of cycles when the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" offset="38" />
|
||||
<event name="MaliTexInBt" title="Texture message read beats" description="The number of texture request message data beats." units="beats" counter="TEX_MSGI_NUM_FLITS" offset="35" />
|
||||
<event name="MaliTexInBusUtil" title="Texture input bus utilization" description="The percentage load on the texture message input bus." units="percent" equation="((TEX_MSGI_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexIndexCy" title="Texture index calculation cycles" description="The number of cycles computing texel index values." units="cycles" counter="TEX_TIDX_NUM_OPERATIONS" offset="94" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="max((TEX_FILT_NUM_OPERATIONS), (TEX_TFCH_NUM_TCL_OPERATIONS), (TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS), (TEX_CFCH_NUM_RP_OPERATIONS), (TEX_MSGI_NUM_FLITS), (TEX_MSGO_NUM_FLITS), (TEX_CFCH_NUM_L1_CL_OPERATIONS), (TEX_CFCH_NUM_L1_CT_OPERATIONS), (TEX_TIDX_NUM_OPERATIONS))" />
|
||||
<event name="MaliTexL1CacheLoadCy" title="L1 texture cache load cycles" description="The number of cycles the L1 cache is being loaded." units="cycles" counter="TEX_CFCH_NUM_OUTPUT_OPERATIONS" offset="87" />
|
||||
<event name="MaliTexL1CacheLookupCy" title="L1 texture cache lookup cycles" description="The number of cycles the L1 cache is being accessed." units="cycles" counter="TEX_CFCH_NUM_L1_CT_OPERATIONS" offset="90" />
|
||||
<event name="MaliTexL1CacheOutputCy" title="L1 texture cache output cycles" description="The number of cycles the L1 cache is returning data." units="cycles" counter="TEX_CFCH_NUM_L1_CL_OPERATIONS" offset="89" />
|
||||
<event name="MaliTexOutBt" title="Texture message write beats" description="The number of texture response message data beats." units="beats" counter="TEX_MSGO_NUM_FLITS" offset="43" />
|
||||
<event name="MaliTexOutBusUtil" title="Texture output bus utilization" description="The percentage load on the texture message output bus." units="percent" equation="((TEX_MSGO_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexOutMsg" title="Texture messages" description="The number of output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_MSG" offset="42" />
|
||||
<event name="MaliTexOutSingleMsg" title="Texture messages with single quad" description="The number of single quad output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_SINGLE_QUAD_MSG" offset="91" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" equation="((TEX_MSGO_NUM_MSG) * 2) - (TEX_MSGO_NUM_SINGLE_QUAD_MSG)" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="(((TEX_MSGO_NUM_MSG) * 2) - (TEX_MSGO_NUM_SINGLE_QUAD_MSG)) * 4" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="((max((TEX_FILT_NUM_OPERATIONS), (TEX_TFCH_NUM_TCL_OPERATIONS), (TEX_CFCH_NUM_DIRECT_PATH_OPERATIONS), (TEX_CFCH_NUM_RP_OPERATIONS), (TEX_MSGI_NUM_FLITS), (TEX_MSGO_NUM_FLITS), (TEX_CFCH_NUM_L1_CL_OPERATIONS), (TEX_CFCH_NUM_L1_CT_OPERATIONS), (TEX_TIDX_NUM_OPERATIONS))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (4)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (4)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (4)) + ((VARY_SLOT_16) / (4))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (4)) + ((VARY_SLOT_16) / (4))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceCullPrim" title="Facing test culled primitives" description="The number of primitives that are culled by facing tests." units="primitives" counter="PRIM_FACE_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceCullRate" title="Facing plane test cull percentage" description="The percentage of primitives culled by the facing test." units="percent" equation="((PRIM_FACE_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FRUSTUM_CULLED) - (PRIM_SCISSOR_CULLED))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPlaneCullPrim" title="Frustum test culled primitives" description="The number of primitives that are culled by frustum tests." units="primitives" counter="PRIM_FRUSTUM_CULLED" offset="13" />
|
||||
<event name="MaliGeomPlaneCullRate" title="Frustum test cull percentage" description="The percentage of primitives culled by the frustum test." units="percent" equation="((PRIM_FRUSTUM_CULLED) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadPartTask" title="Partial tiler position shading requests" description="The number of partial position shading requests in the tiler geometry flow." units="requests" counter="POS_SHADER_PARTIAL_WARPS" offset="22" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (16)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (16)) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FRUSTUM_CULLED) - (PRIM_SCISSOR_CULLED) - (PRIM_FACE_CULLED))) * 100" />
|
||||
<event name="MaliGeomScissorCullPrim" title="Scissor test culled primitives" description="The number of primitives that are culled by the scissor test." units="primitives" counter="PRIM_SCISSOR_CULLED" offset="70" />
|
||||
<event name="MaliGeomScissorCullRate" title="Scissor test cull percentage" description="The percentage of primitives culled by the scissor test." units="percent" equation="((PRIM_SCISSOR_CULLED) / ((((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)) - (PRIM_FRUSTUM_CULLED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadPartTask" title="Partial tiler varying shading requests" description="The number of partial varying shading requests in the tiler geometry flow." units="requests" counter="VAR_SHADER_PARTIAL_WARPS" offset="37" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="36" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (16)" />
|
||||
<event name="MaliGeomVisibleDVSPrim" title="Visible primitives using DVS" description="The number of primitives using DVS that are visible after culling." units="primitives" counter="PRIM_VISIBLE_DVS" offset="71" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_FACE_CULLED) + (PRIM_FRUSTUM_CULLED) + (PRIM_SAT_CULLED) + (PRIM_SCISSOR_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerPrimAsPosShadStallCy" title="Primitive assembly position shading stall cycles" description="The number of cycles when primitive assembly is waiting for position shading." units="cycles" counter="PRIMASSY_POS_SHADER_WAIT" offset="64" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
</category>
|
||||
</metrics>
|
||||
251
src/panfrost/perf/generated/G76.xml
Normal file
251
src/panfrost/perf/generated/G76.xml
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G76">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles when work is queued for processing in the GPU fragment queue." units="cycles" counter="JS0_ACTIVE" offset="10" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" counter="JS0_JOBS" offset="8" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="JS0_TASKS" offset="9" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS0_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragQueueWaitDepCy" title="Fragment queue job dependency wait cycles" description="The number of cycles when queued fragment work is waiting for dependent work to complete." units="cycles" counter="JS0_WAIT_DEPEND" offset="14" />
|
||||
<event name="MaliFragQueueWaitFinishCy" title="Fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued fragment work to complete." units="cycles" counter="JS0_WAIT_FINISH" offset="15" />
|
||||
<event name="MaliFragQueueWaitFlushCy" title="Fragment queue cache flush wait cycles" description="The number of cycles when queued fragment work is waiting for a cache flush." units="cycles" counter="JS0_WAIT_FLUSH" offset="11" />
|
||||
<event name="MaliFragQueueWaitIssueCy" title="Fragment queue job issue wait cycles" description="The number of cycles when queued fragment work is waiting for an available processor." units="cycles" counter="JS0_WAIT_ISSUE" offset="13" />
|
||||
<event name="MaliFragQueueWaitRdCy" title="Fragment queue job descriptor read wait cycles" description="The number of cycles when queued fragment work is waiting for a descriptor load." units="cycles" counter="JS0_WAIT_READ" offset="12" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="7" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueActiveCy" title="Non-fragment queue active cycles" description="The number of cycles when work is queued in the GPU non-fragment queue." units="cycles" counter="JS1_ACTIVE" offset="18" />
|
||||
<event name="MaliNonFragQueueJob" title="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" counter="JS1_JOBS" offset="16" />
|
||||
<event name="MaliNonFragQueueTask" title="Non-fragment tasks" description="The number of non-fragment tasks processed." units="tasks" counter="JS1_TASKS" offset="17" />
|
||||
<event name="MaliNonFragQueueUtil" title="Non-fragment queue utilization" description="The non-fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS1_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueWaitDepCy" title="Non-fragment queue job dependency wait cycles" description="The number of cycles when queued non-fragment work is waiting for dependent work to complete." units="cycles" counter="JS1_WAIT_DEPEND" offset="22" />
|
||||
<event name="MaliNonFragQueueWaitFinishCy" title="Non-fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued non-fragment work to complete." units="cycles" counter="JS1_WAIT_FINISH" offset="23" />
|
||||
<event name="MaliNonFragQueueWaitFlushCy" title="Non-fragment queue cache flush wait cycles" description="The number of cycles when queued non-fragment work is waiting for a cache flush." units="cycles" counter="JS1_WAIT_FLUSH" offset="19" />
|
||||
<event name="MaliNonFragQueueWaitIssueCy" title="Non-fragment queue job issue wait cycles" description="The number of cycles when queued non-fragment work is waiting for an available processor." units="cycles" counter="JS1_WAIT_ISSUE" offset="21" />
|
||||
<event name="MaliNonFragQueueWaitRdCy" title="Non-fragment queue job descriptor read wait cycles" description="The number number of cycles when queued non-fragment work is waiting for a descriptor load." units="cycles" counter="JS1_WAIT_READ" offset="20" />
|
||||
<event name="MaliResQueueActiveCy" title="Reserved active cycles" description="The number of cycles when work is queued in the GPU reserved queue." units="cycles" counter="JS2_ACTIVE" offset="26" />
|
||||
<event name="MaliResQueueJob" title="Reserved queue jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" counter="JS2_JOBS" offset="24" />
|
||||
<event name="MaliResQueueTask" title="Reserved queue tasks" description="The number of reserved tasks processed." units="tasks" counter="JS2_TASKS" offset="25" />
|
||||
<event name="MaliResQueueWaitDepCy" title="Reserved queue job dependency wait cycles" description="The number of cycles when queued reserved work is waiting for dependent work to complete." units="cycles" counter="JS2_WAIT_DEPEND" offset="30" />
|
||||
<event name="MaliResQueueWaitFinishCy" title="Reserved queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued reserved work to complete." units="cycles" counter="JS2_WAIT_FINISH" offset="31" />
|
||||
<event name="MaliResQueueWaitFlushCy" title="Reserved queue cache flush wait cycles" description="The number of cycles when queued reserved work is waiting for a cache flush." units="cycles" counter="JS2_WAIT_FLUSH" offset="27" />
|
||||
<event name="MaliResQueueWaitIssueCy" title="Reserved queue job issue wait cycles" description="The number of cycles when queued reserved work is waiting for an available processor." units="cycles" counter="JS2_WAIT_ISSUE" offset="29" />
|
||||
<event name="MaliResQueueWaitRdCy" title="Reserved queue job descriptor read wait cycles" description="The number of cycles when queued reserved work is waiting for a descriptor load." units="cycles" counter="JS2_WAIT_READ" offset="28" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="63" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
<event name="MaliMMUS2L2Hit" title="MMU stage 2 L2 lookup TLB hits" description="The number of stage 2 level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L2" offset="13" />
|
||||
<event name="MaliMMUS2L2Rd" title="MMU stage 2 L2 lookup requests" description="The number of stage 2 level 2 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L2" offset="11" />
|
||||
<event name="MaliMMUS2L3Hit" title="MMU stage 2 L3 lookup TLB hits" description="The number of stage 2 level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L3" offset="12" />
|
||||
<event name="MaliMMUS2L3Rd" title="MMU stage 2 L3 lookup requests" description="The number of stage 2 level 3 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L3" offset="10" />
|
||||
<event name="MaliMMUS2Lookup" title="MMU stage 2 lookup requests" description="The number of main MMU stage 2 address translations performed." units="requests" counter="MMU_S2_REQUESTS" offset="9" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="(EXEC_INSTR_COUNT)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="(((EXEC_INSTR_COUNT)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreAllRegsWarp" title="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps" counter="WARP_REG_SIZE_64" offset="17" />
|
||||
<event name="MaliCoreAllRegsWarpRate" title="All registers warp percentage" description="The percentage of warps that require more than 32 registers." units="percent" equation="((WARP_REG_SIZE_64) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreFullWarp" title="Full warps" description="The number of warps that have a full thread slot allocation." units="warps" counter="FULL_QUAD_WARPS" offset="21" />
|
||||
<event name="MaliCoreFullWarpRate" title="Full warp percentage" description="The percentage of warps that have a full thread slot allocation." units="percent" equation="((FULL_QUAD_WARPS) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="29" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / (EXEC_INSTR_COUNT)) * 100" />
|
||||
<event name="MaliEngInstr" title="Arithmetic instruction issue cycles" description="The number of instructions run per warp." units="instructions" counter="EXEC_INSTR_COUNT" offset="28" />
|
||||
<event name="MaliEngStarveCy" title="Execution engine starvation cycles" description="The number of cycles when no new threads are available to run." units="cycles" counter="EXEC_INSTR_STARVING" offset="30" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (8)) / 4)" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (8)) / 4)) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="((FRAG_WARPS) * (8)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliFragPartWarp" title="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" counter="FRAG_PARTIAL_WARPS" offset="10" />
|
||||
<event name="MaliFragPartWarpRate" title="Partial coverage percentage" description="The percentage of warps that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_WARPS) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragRdPrim" title="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES" offset="5" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="((FRAG_WARPS) * (8)) / 4" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" equation="(FRAG_WARPS) * (8)" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / ((FRAG_WARPS) * (8))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (FRAG_PTILES)) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (8)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (8))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="61" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="63" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_FILT_NUM_OPERATIONS)) / ((TEX_MSGI_NUM_QUADS) * 4)" />
|
||||
<event name="MaliTexCacheCompressFetch" title="Compressed texture line fetch requests" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" offset="41" />
|
||||
<event name="MaliTexCacheCompressFetchRate" title="Texture data fetches from compressed lines" description="The percentage of texture line fetches that are from block compressed textures." units="percent" equation="((TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED) / (TEX_TFCH_NUM_LINES_FETCHED)) * 100" />
|
||||
<event name="MaliTexCacheFetch" title="Texture line fetch requests" description="The number of texture line fetches from the L2 cache." units="issues" counter="TEX_TFCH_NUM_LINES_FETCHED" offset="40" />
|
||||
<event name="MaliTexCacheLookup" title="Texture cache lookup requests" description="The number of texture cache lookup cycles." units="requests" counter="TEX_TFCH_NUM_OPERATIONS" offset="42" />
|
||||
<event name="MaliTexCacheUtil" title="Texture unit cache utilization" description="The percentage utilization of the texturing unit cache lookup path." units="percent" equation="((TEX_TFCH_NUM_OPERATIONS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="43" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliTexMipInstrRate" title="Texture accesses using mipmapping percentage" description="The percentage of texture operations accessing mipmapped textures." units="percent" equation="((TEX_DFCH_NUM_PASSES_MIP_MAP) / (TEX_DFCH_NUM_PASSES)) * 100" />
|
||||
<event name="MaliTexQuadPass" title="Texture quad issues" description="The number of quad-width filtering passes." units="issues" counter="TEX_DFCH_NUM_PASSES" offset="36" />
|
||||
<event name="MaliTexQuadPassDescMiss" title="Texture quad descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests" counter="TEX_DFCH_NUM_PASSES_MISS" offset="37" />
|
||||
<event name="MaliTexQuadPassMip" title="Mipmapped texture quad issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" offset="38" />
|
||||
<event name="MaliTexQuadPassTri" title="Trilinear filtered texture quad issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" offset="39" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" counter="TEX_MSGI_NUM_QUADS" offset="35" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="(TEX_MSGI_NUM_QUADS) * 4" />
|
||||
<event name="MaliTexTriInstrRate" title="Texture accesses using trilinear filter percentage" description="The percentage of texture operations using trilinear filtering." units="percent" equation="((TEX_TIDX_NUM_SPLIT_MIP_MAP) / (TEX_MSGI_NUM_QUADS)) * 100" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_FILT_NUM_OPERATIONS)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (1)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (1)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceXYPlaneCullPrim" title="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" counter="PRIM_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceXYPlaneCullRate" title="Facing or XY plane test cull percentage" description="The percentage of primitives culled by the facing or frustum XY plane tests." units="percent" equation="((PRIM_CULLED) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED) - (PRIM_CLIPPED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomZPlaneCullPrim" title="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" counter="PRIM_CLIPPED" offset="13" />
|
||||
<event name="MaliGeomZPlaneCullRate" title="Z plane test cull percentage" description="The percentage of primitives culled by the frustum Z plane test." units="percent" equation="((PRIM_CLIPPED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosCacheHitRate" title="Position cache hit percentage" description="The percentage hit rate of the tiler position cache." units="percent" equation="((VCACHE_HIT) / ((VCACHE_HIT) + (VCACHE_MISS))) * 100" />
|
||||
<event name="MaliTilerPosCacheMiss" title="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests" counter="VCACHE_MISS" offset="27" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
<event name="MaliTilerWrBt" title="Internal write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" counter="BUS_WRITE" offset="19" />
|
||||
</category>
|
||||
</metrics>
|
||||
259
src/panfrost/perf/generated/G77.xml
Normal file
259
src/panfrost/perf/generated/G77.xml
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G77">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles when work is queued for processing in the GPU fragment queue." units="cycles" counter="JS0_ACTIVE" offset="10" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" counter="JS0_JOBS" offset="8" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="JS0_TASKS" offset="9" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS0_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragQueueWaitDepCy" title="Fragment queue job dependency wait cycles" description="The number of cycles when queued fragment work is waiting for dependent work to complete." units="cycles" counter="JS0_WAIT_DEPEND" offset="14" />
|
||||
<event name="MaliFragQueueWaitFinishCy" title="Fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued fragment work to complete." units="cycles" counter="JS0_WAIT_FINISH" offset="15" />
|
||||
<event name="MaliFragQueueWaitFlushCy" title="Fragment queue cache flush wait cycles" description="The number of cycles when queued fragment work is waiting for a cache flush." units="cycles" counter="JS0_WAIT_FLUSH" offset="11" />
|
||||
<event name="MaliFragQueueWaitIssueCy" title="Fragment queue job issue wait cycles" description="The number of cycles when queued fragment work is waiting for an available processor." units="cycles" counter="JS0_WAIT_ISSUE" offset="13" />
|
||||
<event name="MaliFragQueueWaitRdCy" title="Fragment queue job descriptor read wait cycles" description="The number of cycles when queued fragment work is waiting for a descriptor load." units="cycles" counter="JS0_WAIT_READ" offset="12" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="7" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueActiveCy" title="Non-fragment queue active cycles" description="The number of cycles when work is queued in the GPU non-fragment queue." units="cycles" counter="JS1_ACTIVE" offset="18" />
|
||||
<event name="MaliNonFragQueueJob" title="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" counter="JS1_JOBS" offset="16" />
|
||||
<event name="MaliNonFragQueueTask" title="Non-fragment tasks" description="The number of non-fragment tasks processed." units="tasks" counter="JS1_TASKS" offset="17" />
|
||||
<event name="MaliNonFragQueueUtil" title="Non-fragment queue utilization" description="The non-fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS1_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueWaitDepCy" title="Non-fragment queue job dependency wait cycles" description="The number of cycles when queued non-fragment work is waiting for dependent work to complete." units="cycles" counter="JS1_WAIT_DEPEND" offset="22" />
|
||||
<event name="MaliNonFragQueueWaitFinishCy" title="Non-fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued non-fragment work to complete." units="cycles" counter="JS1_WAIT_FINISH" offset="23" />
|
||||
<event name="MaliNonFragQueueWaitFlushCy" title="Non-fragment queue cache flush wait cycles" description="The number of cycles when queued non-fragment work is waiting for a cache flush." units="cycles" counter="JS1_WAIT_FLUSH" offset="19" />
|
||||
<event name="MaliNonFragQueueWaitIssueCy" title="Non-fragment queue job issue wait cycles" description="The number of cycles when queued non-fragment work is waiting for an available processor." units="cycles" counter="JS1_WAIT_ISSUE" offset="21" />
|
||||
<event name="MaliNonFragQueueWaitRdCy" title="Non-fragment queue job descriptor read wait cycles" description="The number number of cycles when queued non-fragment work is waiting for a descriptor load." units="cycles" counter="JS1_WAIT_READ" offset="20" />
|
||||
<event name="MaliResQueueActiveCy" title="Reserved active cycles" description="The number of cycles when work is queued in the GPU reserved queue." units="cycles" counter="JS2_ACTIVE" offset="26" />
|
||||
<event name="MaliResQueueJob" title="Reserved queue jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" counter="JS2_JOBS" offset="24" />
|
||||
<event name="MaliResQueueTask" title="Reserved queue tasks" description="The number of reserved tasks processed." units="tasks" counter="JS2_TASKS" offset="25" />
|
||||
<event name="MaliResQueueWaitDepCy" title="Reserved queue job dependency wait cycles" description="The number of cycles when queued reserved work is waiting for dependent work to complete." units="cycles" counter="JS2_WAIT_DEPEND" offset="30" />
|
||||
<event name="MaliResQueueWaitFinishCy" title="Reserved queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued reserved work to complete." units="cycles" counter="JS2_WAIT_FINISH" offset="31" />
|
||||
<event name="MaliResQueueWaitFlushCy" title="Reserved queue cache flush wait cycles" description="The number of cycles when queued reserved work is waiting for a cache flush." units="cycles" counter="JS2_WAIT_FLUSH" offset="27" />
|
||||
<event name="MaliResQueueWaitIssueCy" title="Reserved queue job issue wait cycles" description="The number of cycles when queued reserved work is waiting for an available processor." units="cycles" counter="JS2_WAIT_ISSUE" offset="29" />
|
||||
<event name="MaliResQueueWaitRdCy" title="Reserved queue job descriptor read wait cycles" description="The number of cycles when queued reserved work is waiting for a descriptor load." units="cycles" counter="JS2_WAIT_READ" offset="28" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="63" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
<event name="MaliMMUS2L2Hit" title="MMU stage 2 L2 lookup TLB hits" description="The number of stage 2 level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L2" offset="13" />
|
||||
<event name="MaliMMUS2L2Rd" title="MMU stage 2 L2 lookup requests" description="The number of stage 2 level 2 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L2" offset="11" />
|
||||
<event name="MaliMMUS2L3Hit" title="MMU stage 2 L3 lookup TLB hits" description="The number of stage 2 level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L3" offset="12" />
|
||||
<event name="MaliMMUS2L3Rd" title="MMU stage 2 L3 lookup requests" description="The number of stage 2 level 3 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L3" offset="10" />
|
||||
<event name="MaliMMUS2Lookup" title="MMU stage 2 lookup requests" description="The number of main MMU stage 2 address translations performed." units="requests" counter="MMU_S2_REQUESTS" offset="9" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="max((EXEC_INSTR_FMA), (EXEC_INSTR_CVT), (EXEC_INSTR_SFU) * 4)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="((max((EXEC_INSTR_FMA), (EXEC_INSTR_CVT), (EXEC_INSTR_SFU) * 4)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreAllRegsWarp" title="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps" counter="WARP_REG_SIZE_64" offset="17" />
|
||||
<event name="MaliCoreAllRegsWarpRate" title="All registers warp percentage" description="The percentage of warps that require more than 32 registers." units="percent" equation="((WARP_REG_SIZE_64) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreFullWarp" title="Full warps" description="The number of warps that have a full thread slot allocation." units="warps" counter="FULL_QUAD_WARPS" offset="21" />
|
||||
<event name="MaliCoreFullWarpRate" title="Full warp percentage" description="The percentage of warps that have a full thread slot allocation." units="percent" equation="((FULL_QUAD_WARPS) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliEngArithInstr" title="Arithmetic instruction issue cycles" description="The total number of instructions issued to the FMA, CVT, and SFU pipes." units="instructions" equation="(EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)" />
|
||||
<event name="MaliEngCVTInstr" title="Arithmetic CVT pipe instructions" description="The number of instructions issued to the CVT pipe." units="instructions" counter="EXEC_INSTR_CVT" offset="28" />
|
||||
<event name="MaliEngCVTPipeUtil" title="CVT pipe utilization" description="Defines the utilization of the CVT pipe." units="percent" equation="((EXEC_INSTR_CVT) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="31" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngFMAInstr" title="Arithmetic FMA pipe instructions" description="The number of instructions issued to the FMA pipe." units="instructions" counter="EXEC_INSTR_FMA" offset="27" />
|
||||
<event name="MaliEngFMAPipeUtil" title="FMA pipe utilization" description="The utilization of the FMA pipe." units="percent" equation="((EXEC_INSTR_FMA) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngICacheMiss" title="Instruction cache misses" description="The number of instruction cache misses." units="requests" counter="EXEC_ICACHE_MISS" offset="32" />
|
||||
<event name="MaliEngSFUInstr" title="Arithmetic SFU pipe instructions" description="The number of instructions issued to the SFU pipe." units="instructions" counter="EXEC_INSTR_SFU" offset="29" />
|
||||
<event name="MaliEngSFUPipeUtil" title="SFU pipe utilization" description="The utilization of the SFU pipe." units="percent" equation="(((EXEC_INSTR_SFU) * 4) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngSWBlendInstr" title="Blend shader instructions" description="The number of blend shader invocations run." units="instructions" counter="CALL_BLEND_SHADER" offset="34" />
|
||||
<event name="MaliEngSWBlendRate" title="Shader blend percentage" description="The percentage of fragments that use shader-based blending." units="percent" equation="(((CALL_BLEND_SHADER) * 2) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliEngStarveCy" title="Processing unit starvation cycles" description="The number of cycles when the processing unit is starved of work." units="cycles" counter="EXEC_STARVE_ARITH" offset="33" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="((FRAG_WARPS) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliFragRastPartQd" title="Partial rasterized fine quads" description="The number of rasterized fine quads created with partial coverage." units="quads" counter="FRAG_PARTIAL_QUADS_RAST" offset="10" />
|
||||
<event name="MaliFragRastPartQdRate" title="Partial coverage percentage" description="The percentage of rasterized fine quads that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_QUADS_RAST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragRdPrim" title="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES_OUT" offset="5" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="((FRAG_WARPS) * (16)) / 4" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" equation="(FRAG_WARPS) * (16)" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / ((FRAG_WARPS) * (16))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (FRAG_PTILES)) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (16)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (16))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="61" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="63" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_FILT_NUM_OPERATIONS)) / (((TEX_MSGO_NUM_MSG)) * 4)" />
|
||||
<event name="MaliTexDataFetchStallCy" title="Texture fetch stall cycles" description="The number of cycles when a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" counter="TEX_TFCH_CLK_STALLED" offset="37" />
|
||||
<event name="MaliTexDescStallCy" title="Texture descriptor stall cycles" description="The number of cycles when a quad is stalled on texture descriptor fetch." units="cycles" counter="TEX_DFCH_CLK_STALLED" offset="36" />
|
||||
<event name="MaliTexFiltFullRate" title="Texture full speed filtering percentage" description="The percentage of texture filtering cycles using the full width of the texture filtering data path." units="percent" equation="(((TEX_FILT_NUM_FXR_OPERATIONS) + (TEX_FILT_NUM_FST_OPERATIONS)) / (TEX_FILT_NUM_OPERATIONS)) * 100" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="39" />
|
||||
<event name="MaliTexFiltStallCy" title="Texture filtering stall cycles" description="The number of cycles when the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" offset="38" />
|
||||
<event name="MaliTexFullBiFiltCy" title="Texture filtering cycles using full bilinear" description="The number of cycles when the filtering unit is filled with bilinear filtering." units="cycles" counter="TEX_FILT_NUM_FXR_OPERATIONS" offset="40" />
|
||||
<event name="MaliTexFullTriFiltCy" title="Texture filtering cycles using full trilinear" description="The number of cycles when the filtering unit is filled with trilinear filtering." units="cycles" counter="TEX_FILT_NUM_FST_OPERATIONS" offset="41" />
|
||||
<event name="MaliTexInBt" title="Texture message read beats" description="The number of texture request message data beats." units="beats" counter="TEX_MSGI_NUM_FLITS" offset="35" />
|
||||
<event name="MaliTexInBusUtil" title="Texture input bus utilization" description="The percentage load on the texture message input bus." units="percent" equation="((TEX_MSGI_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliTexOutBt" title="Texture message write beats" description="The number of texture response message data beats." units="beats" counter="TEX_MSGO_NUM_FLITS" offset="43" />
|
||||
<event name="MaliTexOutBusUtil" title="Texture output bus utilization" description="The percentage load on the texture message output bus." units="percent" equation="((TEX_MSGO_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexOutMsg" title="Texture messages" description="The number of output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_MSG" offset="42" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" equation="(TEX_MSGO_NUM_MSG)" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="((TEX_MSGO_NUM_MSG)) * 4" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_FILT_NUM_OPERATIONS)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (1)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (1)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceXYPlaneCullPrim" title="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" counter="PRIM_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceXYPlaneCullRate" title="Facing or XY plane test cull percentage" description="The percentage of primitives culled by the facing or frustum XY plane tests." units="percent" equation="((PRIM_CULLED) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED) - (PRIM_CLIPPED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomZPlaneCullPrim" title="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" counter="PRIM_CLIPPED" offset="13" />
|
||||
<event name="MaliGeomZPlaneCullRate" title="Z plane test cull percentage" description="The percentage of primitives culled by the frustum Z plane test." units="percent" equation="((PRIM_CLIPPED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosCacheHitRate" title="Position cache hit percentage" description="The percentage hit rate of the tiler position cache." units="percent" equation="((VCACHE_HIT) / ((VCACHE_HIT) + (VCACHE_MISS))) * 100" />
|
||||
<event name="MaliTilerPosCacheMiss" title="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests" counter="VCACHE_MISS" offset="27" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
</category>
|
||||
</metrics>
|
||||
261
src/panfrost/perf/generated/G78.xml
Normal file
261
src/panfrost/perf/generated/G78.xml
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
<!--
|
||||
Copyright (c) 2026 Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ 4ea7c0127fe2942a00e4a1123bb62c625a401f93.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
<metrics id="G78">
|
||||
<category name="GPU Front-end">
|
||||
<event name="MaliFragQueueActiveCy" title="Fragment queue active cycles" description="The number of cycles when work is queued for processing in the GPU fragment queue." units="cycles" counter="JS0_ACTIVE" offset="10" />
|
||||
<event name="MaliFragQueueJob" title="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" counter="JS0_JOBS" offset="8" />
|
||||
<event name="MaliFragQueueTask" title="Fragment tasks" description="The number of fragment tasks processed." units="tasks" counter="JS0_TASKS" offset="9" />
|
||||
<event name="MaliFragQueueUtil" title="Fragment queue utilization" description="The fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS0_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliFragQueueWaitDepCy" title="Fragment queue job dependency wait cycles" description="The number of cycles when queued fragment work is waiting for dependent work to complete." units="cycles" counter="JS0_WAIT_DEPEND" offset="14" />
|
||||
<event name="MaliFragQueueWaitFinishCy" title="Fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued fragment work to complete." units="cycles" counter="JS0_WAIT_FINISH" offset="15" />
|
||||
<event name="MaliFragQueueWaitFlushCy" title="Fragment queue cache flush wait cycles" description="The number of cycles when queued fragment work is waiting for a cache flush." units="cycles" counter="JS0_WAIT_FLUSH" offset="11" />
|
||||
<event name="MaliFragQueueWaitIssueCy" title="Fragment queue job issue wait cycles" description="The number of cycles when queued fragment work is waiting for an available processor." units="cycles" counter="JS0_WAIT_ISSUE" offset="13" />
|
||||
<event name="MaliFragQueueWaitRdCy" title="Fragment queue job descriptor read wait cycles" description="The number of cycles when queued fragment work is waiting for a descriptor load." units="cycles" counter="JS0_WAIT_READ" offset="12" />
|
||||
<event name="MaliGPUActiveCy" title="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles" counter="GPU_ACTIVE" offset="6" />
|
||||
<event name="MaliGPUIRQActiveCy" title="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles" counter="IRQ_ACTIVE" offset="7" />
|
||||
<event name="MaliGPUIRQUtil" title="Interrupt pending utilization" description="The IRQ pending utilization compared against the GPU active cycles." units="percent" equation="((IRQ_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueActiveCy" title="Non-fragment queue active cycles" description="The number of cycles when work is queued in the GPU non-fragment queue." units="cycles" counter="JS1_ACTIVE" offset="18" />
|
||||
<event name="MaliNonFragQueueJob" title="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" counter="JS1_JOBS" offset="16" />
|
||||
<event name="MaliNonFragQueueTask" title="Non-fragment tasks" description="The number of non-fragment tasks processed." units="tasks" counter="JS1_TASKS" offset="17" />
|
||||
<event name="MaliNonFragQueueUtil" title="Non-fragment queue utilization" description="The non-fragment queue utilization compared against the GPU active cycles." units="percent" equation="((JS1_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragQueueWaitDepCy" title="Non-fragment queue job dependency wait cycles" description="The number of cycles when queued non-fragment work is waiting for dependent work to complete." units="cycles" counter="JS1_WAIT_DEPEND" offset="22" />
|
||||
<event name="MaliNonFragQueueWaitFinishCy" title="Non-fragment queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued non-fragment work to complete." units="cycles" counter="JS1_WAIT_FINISH" offset="23" />
|
||||
<event name="MaliNonFragQueueWaitFlushCy" title="Non-fragment queue cache flush wait cycles" description="The number of cycles when queued non-fragment work is waiting for a cache flush." units="cycles" counter="JS1_WAIT_FLUSH" offset="19" />
|
||||
<event name="MaliNonFragQueueWaitIssueCy" title="Non-fragment queue job issue wait cycles" description="The number of cycles when queued non-fragment work is waiting for an available processor." units="cycles" counter="JS1_WAIT_ISSUE" offset="21" />
|
||||
<event name="MaliNonFragQueueWaitRdCy" title="Non-fragment queue job descriptor read wait cycles" description="The number number of cycles when queued non-fragment work is waiting for a descriptor load." units="cycles" counter="JS1_WAIT_READ" offset="20" />
|
||||
<event name="MaliResQueueActiveCy" title="Reserved active cycles" description="The number of cycles when work is queued in the GPU reserved queue." units="cycles" counter="JS2_ACTIVE" offset="26" />
|
||||
<event name="MaliResQueueJob" title="Reserved queue jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" counter="JS2_JOBS" offset="24" />
|
||||
<event name="MaliResQueueTask" title="Reserved queue tasks" description="The number of reserved tasks processed." units="tasks" counter="JS2_TASKS" offset="25" />
|
||||
<event name="MaliResQueueWaitDepCy" title="Reserved queue job dependency wait cycles" description="The number of cycles when queued reserved work is waiting for dependent work to complete." units="cycles" counter="JS2_WAIT_DEPEND" offset="30" />
|
||||
<event name="MaliResQueueWaitFinishCy" title="Reserved queue job finish wait cycles" description="The number of cycles when the GPU is waiting for issued reserved work to complete." units="cycles" counter="JS2_WAIT_FINISH" offset="31" />
|
||||
<event name="MaliResQueueWaitFlushCy" title="Reserved queue cache flush wait cycles" description="The number of cycles when queued reserved work is waiting for a cache flush." units="cycles" counter="JS2_WAIT_FLUSH" offset="27" />
|
||||
<event name="MaliResQueueWaitIssueCy" title="Reserved queue job issue wait cycles" description="The number of cycles when queued reserved work is waiting for an available processor." units="cycles" counter="JS2_WAIT_ISSUE" offset="29" />
|
||||
<event name="MaliResQueueWaitRdCy" title="Reserved queue job descriptor read wait cycles" description="The number of cycles when queued reserved work is waiting for a descriptor load." units="cycles" counter="JS2_WAIT_READ" offset="28" />
|
||||
</category>
|
||||
<category name="Memory System">
|
||||
<event name="MaliExtBusRd" title="Output external read transactions" description="The number of external read transactions." units="transactions" counter="L2_EXT_READ" offset="29" />
|
||||
<event name="MaliExtBusRdBt" title="Output external read beats" description="The number of external bus data read cycles." units="beats" counter="L2_EXT_READ_BEATS" offset="32" />
|
||||
<event name="MaliExtBusRdBy" title="Output external read bytes" description="The total output read bandwidth for the GPU." units="bytes" equation="(L2_EXT_READ_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusRdLat0" title="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" counter="L2_EXT_RRESP_0_127" offset="37" />
|
||||
<event name="MaliExtBusRdLat128" title="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" counter="L2_EXT_RRESP_128_191" offset="38" />
|
||||
<event name="MaliExtBusRdLat192" title="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" counter="L2_EXT_RRESP_192_255" offset="39" />
|
||||
<event name="MaliExtBusRdLat256" title="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" counter="L2_EXT_RRESP_256_319" offset="40" />
|
||||
<event name="MaliExtBusRdLat320" title="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" counter="L2_EXT_RRESP_320_383" offset="41" />
|
||||
<event name="MaliExtBusRdLat384" title="Output external read latency 384+ cycles" description="The number of read beats that are returned at least 384 cycles after the transaction started." units="beats" equation="(L2_EXT_READ_BEATS) - (L2_EXT_RRESP_0_127) - (L2_EXT_RRESP_128_191) - (L2_EXT_RRESP_192_255) - (L2_EXT_RRESP_256_319) - (L2_EXT_RRESP_320_383)" />
|
||||
<event name="MaliExtBusRdNoSnoop" title="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" counter="L2_EXT_READ_NOSNP" offset="30" />
|
||||
<event name="MaliExtBusRdOTQ1" title="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q1" offset="34" />
|
||||
<event name="MaliExtBusRdOTQ2" title="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q2" offset="35" />
|
||||
<event name="MaliExtBusRdOTQ3" title="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AR_CNT_Q3" offset="36" />
|
||||
<event name="MaliExtBusRdOTQ4" title="Output external outstanding reads 75-100%" description="The number of read transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_READ) - (L2_EXT_AR_CNT_Q1) - (L2_EXT_AR_CNT_Q2) - (L2_EXT_AR_CNT_Q3)" />
|
||||
<event name="MaliExtBusRdStallCy" title="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles" counter="L2_EXT_AR_STALL" offset="33" />
|
||||
<event name="MaliExtBusRdStallRate" title="Output external read stall percentage" description="The percentage of cycles with an external read transaction stalled." units="percent" equation="((L2_EXT_AR_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliExtBusRdUnique" title="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" counter="L2_EXT_READ_UNIQUE" offset="31" />
|
||||
<event name="MaliExtBusWr" title="Output external write transactions" description="The number of external write transactions." units="transactions" counter="L2_EXT_WRITE" offset="42" />
|
||||
<event name="MaliExtBusWrBt" title="Output external write beats" description="The number of external bus data write cycles." units="beats" counter="L2_EXT_WRITE_BEATS" offset="47" />
|
||||
<event name="MaliExtBusWrBy" title="Output external write bytes" description="The total output write bandwidth for the GPU." units="bytes" equation="(L2_EXT_WRITE_BEATS) * (MALI_CONFIG_EXT_BUS_BYTE_SIZE)" />
|
||||
<event name="MaliExtBusWrNoSnoopFull" title="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_FULL" offset="43" />
|
||||
<event name="MaliExtBusWrNoSnoopPart" title="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_NOSNP_PTL" offset="44" />
|
||||
<event name="MaliExtBusWrOTQ1" title="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q1" offset="49" />
|
||||
<event name="MaliExtBusWrOTQ2" title="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q2" offset="50" />
|
||||
<event name="MaliExtBusWrOTQ3" title="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" counter="L2_EXT_AW_CNT_Q3" offset="51" />
|
||||
<event name="MaliExtBusWrOTQ4" title="Output external outstanding writes 75-100%" description="The number of write transactions initiated when 75-100% of transaction IDs are in use." units="transactions" equation="(L2_EXT_WRITE) - (L2_EXT_AW_CNT_Q1) - (L2_EXT_AW_CNT_Q2) - (L2_EXT_AW_CNT_Q3)" />
|
||||
<event name="MaliExtBusWrSnoopFull" title="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_FULL" offset="45" />
|
||||
<event name="MaliExtBusWrSnoopPart" title="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" counter="L2_EXT_WRITE_SNP_PTL" offset="46" />
|
||||
<event name="MaliExtBusWrStallCy" title="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles" counter="L2_EXT_W_STALL" offset="48" />
|
||||
<event name="MaliExtBusWrStallRate" title="Output external write stall percentage" description="The percentage of cycles with an external write transaction stalled." units="percent" equation="((L2_EXT_W_STALL) / (MALI_CONFIG_L2_CACHE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliL2CacheFlush" title="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests" counter="CACHE_FLUSH" offset="63" />
|
||||
<event name="MaliL2CacheIncSnp" title="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions" counter="L2_EXT_SNOOP" offset="52" />
|
||||
<event name="MaliL2CacheIncSnpStallCy" title="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles" counter="L2_EXT_SNOOP_STALL" offset="53" />
|
||||
<event name="MaliL2CacheL1Rd" title="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests" counter="L2_RD_MSG_OUT" offset="22" />
|
||||
<event name="MaliL2CacheL1RdStallCy" title="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles" counter="L2_RD_MSG_OUT_STALL" offset="23" />
|
||||
<event name="MaliL2CacheL1Wr" title="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests" counter="L2_WR_MSG_OUT" offset="24" />
|
||||
<event name="MaliL2CacheLookup" title="Any lookup requests" description="The number of L2 cache lookups performed." units="requests" counter="L2_ANY_LOOKUP" offset="25" />
|
||||
<event name="MaliL2CacheRd" title="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests" counter="L2_RD_MSG_IN" offset="16" />
|
||||
<event name="MaliL2CacheRdLookup" title="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests" counter="L2_READ_LOOKUP" offset="26" />
|
||||
<event name="MaliL2CacheRdMissRate" title="L2 cache read miss percentage" description="The percentage of internal L2 cache reads that result in an external read." units="percent" equation="((L2_EXT_READ) / (L2_READ_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheRdStallCy" title="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles" counter="L2_RD_MSG_IN_STALL" offset="17" />
|
||||
<event name="MaliL2CacheSnp" title="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests" counter="L2_SNP_MSG_IN" offset="20" />
|
||||
<event name="MaliL2CacheSnpLookup" title="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests" counter="L2_EXT_SNOOP_LOOKUP" offset="28" />
|
||||
<event name="MaliL2CacheSnpStallCy" title="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles" counter="L2_SNP_MSG_IN_STALL" offset="21" />
|
||||
<event name="MaliL2CacheWr" title="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests" counter="L2_WR_MSG_IN" offset="18" />
|
||||
<event name="MaliL2CacheWrLookup" title="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests" counter="L2_WRITE_LOOKUP" offset="27" />
|
||||
<event name="MaliL2CacheWrMissRate" title="L2 cache write miss percentage" description="The percentage of internal L2 cache writes that result in an external write." units="percent" equation="((L2_EXT_WRITE) / (L2_WRITE_LOOKUP)) * 100" />
|
||||
<event name="MaliL2CacheWrStallCy" title="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles" counter="L2_WR_MSG_IN_STALL" offset="19" />
|
||||
<event name="MaliMMUL2Hit" title="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L2" offset="8" />
|
||||
<event name="MaliMMUL2Rd" title="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests" counter="MMU_TABLE_READS_L2" offset="6" />
|
||||
<event name="MaliMMUL3Hit" title="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_HIT_L3" offset="7" />
|
||||
<event name="MaliMMUL3Rd" title="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests" counter="MMU_TABLE_READS_L3" offset="5" />
|
||||
<event name="MaliMMULookup" title="MMU lookup requests" description="The number of main MMU address translations performed." units="requests" counter="MMU_REQUESTS" offset="4" />
|
||||
<event name="MaliMMUS2L2Hit" title="MMU stage 2 L2 lookup TLB hits" description="The number of stage 2 level 2 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L2" offset="13" />
|
||||
<event name="MaliMMUS2L2Rd" title="MMU stage 2 L2 lookup requests" description="The number of stage 2 level 2 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L2" offset="11" />
|
||||
<event name="MaliMMUS2L3Hit" title="MMU stage 2 L3 lookup TLB hits" description="The number of stage 2 level 3 translation table reads that hit in the main MMU TLB." units="requests" counter="MMU_S2_HIT_L3" offset="12" />
|
||||
<event name="MaliMMUS2L3Rd" title="MMU stage 2 L3 lookup requests" description="The number of stage 2 level 3 translation table reads." units="requests" counter="MMU_S2_TABLE_READS_L3" offset="10" />
|
||||
<event name="MaliMMUS2Lookup" title="MMU stage 2 lookup requests" description="The number of main MMU stage 2 address translations performed." units="requests" counter="MMU_S2_REQUESTS" offset="9" />
|
||||
</category>
|
||||
<category name="Shader Core">
|
||||
<event name="MaliALUIssueCy" title="Arithmetic unit issue cycles" description="The number of cycles the arithmetic unit was busy." units="cycles" equation="max((EXEC_INSTR_FMA), (EXEC_INSTR_CVT), (EXEC_INSTR_SFU) * 4)" />
|
||||
<event name="MaliALUUtil" title="Arithmetic unit utilization" description="The percentage utilization of the arithmetic unit." units="percent" equation="((max((EXEC_INSTR_FMA), (EXEC_INSTR_CVT), (EXEC_INSTR_SFU) * 4)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliAnyActiveCy" title="Any workload active cycles" description="The number of cycles when the shader core is processing either a non-fragment workload or a fragment workload." units="cycles" counter="SHADER_CORE_ACTIVE" offset="53" />
|
||||
<event name="MaliAnyUtil" title="Shader core clock ratio" description="An estimate of shader core use relative to the GPU top-level clock." units="percent" equation="((SHADER_CORE_ACTIVE) / (MALI_CONFIG_SHADER_CORE_COUNT) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliAttrInstr" title="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions" counter="ATTR_INSTR" offset="52" />
|
||||
<event name="MaliCoreActiveCy" title="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles" counter="EXEC_CORE_ACTIVE" offset="26" />
|
||||
<event name="MaliCoreAllRegsWarp" title="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps" counter="WARP_REG_SIZE_64" offset="17" />
|
||||
<event name="MaliCoreAllRegsWarpRate" title="All registers warp percentage" description="The percentage of warps that require more than 32 registers." units="percent" equation="((WARP_REG_SIZE_64) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreFullWarp" title="Full warps" description="The number of warps that have a full thread slot allocation." units="warps" counter="FULL_QUAD_WARPS" offset="21" />
|
||||
<event name="MaliCoreFullWarpRate" title="Full warp percentage" description="The percentage of warps that have a full thread slot allocation." units="percent" equation="((FULL_QUAD_WARPS) / ((COMPUTE_WARPS) + (FRAG_WARPS))) * 100" />
|
||||
<event name="MaliCoreUtil" title="Execution core utilization" description="The utilization of the programmable shader core." units="percent" equation="((EXEC_CORE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngArithInstr" title="Arithmetic instruction issue cycles" description="The total number of instructions issued to the FMA, CVT, and SFU pipes." units="instructions" equation="(EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU)" />
|
||||
<event name="MaliEngCVTInstr" title="Arithmetic CVT pipe instructions" description="The number of instructions issued to the CVT pipe." units="instructions" counter="EXEC_INSTR_CVT" offset="28" />
|
||||
<event name="MaliEngCVTPipeUtil" title="CVT pipe utilization" description="Defines the utilization of the CVT pipe." units="percent" equation="((EXEC_INSTR_CVT) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngDivergedInstr" title="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions" counter="EXEC_INSTR_DIVERGED" offset="31" />
|
||||
<event name="MaliEngDivergedInstrRate" title="Warp divergence percentage" description="The percentage of instructions that have control flow divergence across the warp." units="percent" equation="((EXEC_INSTR_DIVERGED) / ((EXEC_INSTR_FMA) + (EXEC_INSTR_CVT) + (EXEC_INSTR_SFU))) * 100" />
|
||||
<event name="MaliEngFMAInstr" title="Arithmetic FMA pipe instructions" description="The number of instructions issued to the FMA pipe." units="instructions" counter="EXEC_INSTR_FMA" offset="27" />
|
||||
<event name="MaliEngFMAPipeUtil" title="FMA pipe utilization" description="The utilization of the FMA pipe." units="percent" equation="((EXEC_INSTR_FMA) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngICacheMiss" title="Instruction cache misses" description="The number of instruction cache misses." units="requests" counter="EXEC_ICACHE_MISS" offset="32" />
|
||||
<event name="MaliEngSFUInstr" title="Arithmetic SFU pipe instructions" description="The number of instructions issued to the SFU pipe." units="instructions" counter="EXEC_INSTR_SFU" offset="29" />
|
||||
<event name="MaliEngSFUPipeUtil" title="SFU pipe utilization" description="The utilization of the SFU pipe." units="percent" equation="(((EXEC_INSTR_SFU) * 4) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliEngSWBlendInstr" title="Blend shader instructions" description="The number of blend shader invocations run." units="instructions" counter="CALL_BLEND_SHADER" offset="34" />
|
||||
<event name="MaliEngSWBlendRate" title="Shader blend percentage" description="The percentage of fragments that use shader-based blending." units="percent" equation="(((CALL_BLEND_SHADER) * 2) / (FRAG_WARPS)) * 100" />
|
||||
<event name="MaliEngStarveCy" title="Processing unit starvation cycles" description="The number of cycles when the processing unit is starved of work." units="cycles" counter="EXEC_STARVE_ARITH" offset="33" />
|
||||
<event name="MaliFragActiveCy" title="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles" counter="FRAG_ACTIVE" offset="4" />
|
||||
<event name="MaliFragEZSKillQd" title="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_KILL" offset="14" />
|
||||
<event name="MaliFragEZSKillRate" title="Early ZS killed quad percentage" description="The percentage of rasterized quads that are killed by early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSTestQd" title="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" counter="FRAG_QUADS_EZS_TEST" offset="12" />
|
||||
<event name="MaliFragEZSTestRate" title="Early ZS tested quad percentage" description="The percentage of rasterized quads that were subjected to early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragEZSUpdateQd" title="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" counter="FRAG_QUADS_EZS_UPDATE" offset="13" />
|
||||
<event name="MaliFragEZSUpdateRate" title="Early ZS updated quad percentage" description="The percentage of rasterized quads that update the framebuffer during early depth and stencil testing." units="percent" equation="((FRAG_QUADS_EZS_UPDATE) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragFPKActiveCy" title="Fragment pre-pipe buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles" counter="FRAG_FPK_ACTIVE" offset="7" />
|
||||
<event name="MaliFragFPKBUtil" title="Fragment pre-pipe buffer utilization" description="The percentage of cycles when at least one quad is buffered for fragment shading." units="percent" equation="((FRAG_FPK_ACTIVE) / (FRAG_ACTIVE)) * 100" />
|
||||
<event name="MaliFragFPKKillQd" title="FPK HSR killed quads" description="The number of quads that are killed by hidden surface removal." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)" />
|
||||
<event name="MaliFragFPKKillRate" title="FPK HSR killed quad percentage" description="The percentage of rasterized quads that are killed by hidden surface removal." units="percent" equation="(((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (((FRAG_WARPS) * (16)) / 4)) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSKillQd" title="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" counter="FRAG_LZS_KILL" offset="16" />
|
||||
<event name="MaliFragLZSKillRate" title="Late ZS killed quad percentage" description="The percentage of rasterized quads that are killed by late depth and stencil testing." units="percent" equation="((FRAG_LZS_KILL) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragLZSTestQd" title="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" counter="FRAG_LZS_TEST" offset="15" />
|
||||
<event name="MaliFragLZSTestRate" title="Late ZS tested quad percentage" description="The percentage of rasterized quads that are tested by late depth and stencil testing." units="percent" equation="((FRAG_LZS_TEST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragOpaqueQd" title="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" counter="QUAD_FPK_KILLER" offset="20" />
|
||||
<event name="MaliFragOpaqueQdRate" title="Occluding quad percentage" description="The percentage of quads that are valid occluders for hidden surface removal." units="percent" equation="((QUAD_FPK_KILLER) / ((FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL))) * 100" />
|
||||
<event name="MaliFragOverdraw" title="Fragments per pixel" description="The number of fragments shaded per output pixel." units="threads" equation="((FRAG_WARPS) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliFragRastPartQd" title="Partial rasterized fine quads" description="The number of rasterized fine quads created with partial coverage." units="quads" counter="FRAG_PARTIAL_QUADS_RAST" offset="10" />
|
||||
<event name="MaliFragRastPartQdRate" title="Partial coverage percentage" description="The percentage of rasterized fine quads that contain samples with no coverage." units="percent" equation="((FRAG_PARTIAL_QUADS_RAST) / (FRAG_QUADS_RAST)) * 100" />
|
||||
<event name="MaliFragRastPrim" title="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" counter="FRAG_PRIM_RAST" offset="6" />
|
||||
<event name="MaliFragRastQd" title="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads" counter="FRAG_QUADS_RAST" offset="11" />
|
||||
<event name="MaliFragRdPrim" title="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives" counter="FRAG_PRIMITIVES_OUT" offset="5" />
|
||||
<event name="MaliFragShadedQd" title="Shaded coarse quads" description="The number of 2x2 fragment quads that are fragment shaded." units="quads" equation="((FRAG_WARPS) * (16)) / 4" />
|
||||
<event name="MaliFragThread" title="Fragment threads" description="The number of fragment threads started." units="threads" equation="(FRAG_WARPS) * (16)" />
|
||||
<event name="MaliFragThroughputCy" title="Average cycles per fragment thread" description="The average number of shader core cycles per fragment thread." units="cycles" equation="(FRAG_ACTIVE) / ((FRAG_WARPS) * (16))" />
|
||||
<event name="MaliFragTile" title="Tiles" description="The number of tiles processed by the shader core." units="tiles" counter="FRAG_PTILES" offset="18" />
|
||||
<event name="MaliFragTileKill" title="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles" counter="FRAG_TRANS_ELIM" offset="19" />
|
||||
<event name="MaliFragTileKillRate" title="Unchanged tile kill percentage" description="The percentage of tiles that are killed by transaction elimination." units="percent" equation="((FRAG_TRANS_ELIM) / (FRAG_PTILES)) * 100" />
|
||||
<event name="MaliFragTransparentQd" title="Non-occluding quads" description="The number of quads that are not eligible to be a hidden surface removal occluder." units="quads" equation="(FRAG_QUADS_RAST) - (FRAG_QUADS_EZS_KILL) - (QUAD_FPK_KILLER)" />
|
||||
<event name="MaliFragUtil" title="Fragment utilization" description="The utilization of the shader core fragment path." units="percent" equation="((FRAG_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliFragWarp" title="Fragment warps" description="The number of fragment warps created." units="warps" counter="FRAG_WARPS" offset="9" />
|
||||
<event name="MaliLSAtomic" title="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles" counter="LS_MEM_ATOMIC" offset="48" />
|
||||
<event name="MaliLSFullRd" title="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles" counter="LS_MEM_READ_FULL" offset="44" />
|
||||
<event name="MaliLSFullWr" title="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_FULL" offset="46" />
|
||||
<event name="MaliLSIssueCy" title="Load/store unit issue cycles" description="The total number of load/store issue cycles." units="cycles" equation="((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)" />
|
||||
<event name="MaliLSPartRd" title="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles" counter="LS_MEM_READ_SHORT" offset="45" />
|
||||
<event name="MaliLSPartWr" title="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles" counter="LS_MEM_WRITE_SHORT" offset="47" />
|
||||
<event name="MaliLSRdCy" title="Load/store unit read issues" description="The total number of load/store read cycles." units="cycles" equation="(LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)" />
|
||||
<event name="MaliLSUtil" title="Load/store unit utilization" description="The percentage utilization of the load/store unit." units="percent" equation="((((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT)) + ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)) + (LS_MEM_ATOMIC)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliLSWrCy" title="Load/store unit write issues" description="The total number of load/store write cycles." units="cycles" equation="(LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT)" />
|
||||
<event name="MaliNonFragActiveCy" title="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles" counter="COMPUTE_ACTIVE" offset="22" />
|
||||
<event name="MaliNonFragTask" title="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" counter="COMPUTE_TASKS" offset="23" />
|
||||
<event name="MaliNonFragThread" title="Non-fragment threads" description="The number of non-fragment threads started." units="threads" equation="(COMPUTE_WARPS) * (16)" />
|
||||
<event name="MaliNonFragThroughputCy" title="Average cycles per non-fragment thread" description="The average number of shader core cycles per non-fragment thread." units="cycles" equation="(COMPUTE_ACTIVE) / ((COMPUTE_WARPS) * (16))" />
|
||||
<event name="MaliNonFragUtil" title="Non-fragment utilization" description="The utilization of the shader core non-fragment path." units="percent" equation="((COMPUTE_ACTIVE) / (SHADER_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliNonFragWarp" title="Non-fragment warps" description="The number of non-fragment warps created." units="warps" counter="COMPUTE_WARPS" offset="24" />
|
||||
<event name="MaliSCBusFFEExtRdBt" title="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_FTC_EXT" offset="55" />
|
||||
<event name="MaliSCBusFFEExtRdBy" title="Fragment front-end read bytes from external memory" description="The total number of bytes read from the external memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC_EXT) * (16)" />
|
||||
<event name="MaliSCBusFFEL2RdBt" title="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats" counter="BEATS_RD_FTC" offset="54" />
|
||||
<event name="MaliSCBusFFEL2RdBy" title="Fragment front-end read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the shader core fragment front-end." units="bytes" equation="(BEATS_RD_FTC) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdBt" title="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_LSC_EXT" offset="57" />
|
||||
<event name="MaliSCBusLSExtRdBy" title="Load/store unit read bytes from external memory" description="The total number of bytes read from the external memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC_EXT) * (16)" />
|
||||
<event name="MaliSCBusLSExtRdByPerRd" title="Load/store unit bytes read from external memory per access cycle" description="The average number of bytes read from the external memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC_EXT) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSL2RdBt" title="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats" counter="BEATS_RD_LSC" offset="56" />
|
||||
<event name="MaliSCBusLSL2RdBy" title="Load/store unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the load/store unit." units="bytes" equation="(BEATS_RD_LSC) * (16)" />
|
||||
<event name="MaliSCBusLSL2RdByPerRd" title="Load/store unit bytes read from L2 per access cycle" description="The average number of bytes read from the L2 memory system by the load/store unit per read cycle." units="bytes" equation="((BEATS_RD_LSC) * (16)) / ((LS_MEM_READ_FULL) + (LS_MEM_READ_SHORT))" />
|
||||
<event name="MaliSCBusLSOtherWrBt" title="Load/store unit other write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats" counter="BEATS_WR_LSC_OTHER" offset="61" />
|
||||
<event name="MaliSCBusLSWBWrBt" title="Load/store unit write-back write beats to L2 memory system" description="The number of write beats by the load/store unit that are because of write-back." units="beats" counter="BEATS_WR_LSC_WB" offset="63" />
|
||||
<event name="MaliSCBusLSWrBt" title="Load/store unit write beats to L2 memory system" description="The number of write beats sent by the load/store unit." units="beats" equation="(BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)" />
|
||||
<event name="MaliSCBusLSWrBy" title="Load/store unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the load/store unit." units="bytes" equation="((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)" />
|
||||
<event name="MaliSCBusLSWrByPerWr" title="Load/store unit bytes written to L2 per access cycle" description="The average number of bytes written to the L2 memory system by the load/store unit per write cycle." units="bytes" equation="(((BEATS_WR_LSC_WB) + (BEATS_WR_LSC_OTHER)) * (16)) / ((LS_MEM_WRITE_FULL) + (LS_MEM_WRITE_SHORT))" />
|
||||
<event name="MaliSCBusOtherL2RdBt" title="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats" counter="BEATS_RD_OTHER" offset="60" />
|
||||
<event name="MaliSCBusTexExtRdBt" title="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats" counter="BEATS_RD_TEX_EXT" offset="59" />
|
||||
<event name="MaliSCBusTexExtRdBy" title="Texture unit read bytes from external memory" description="The total number of bytes read from the external memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX_EXT) * (16)" />
|
||||
<event name="MaliSCBusTexExtRdByPerRd" title="Texture unit bytes read from external memory per texture cycle" description="The average number of bytes read from the external memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX_EXT) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTexL2RdBt" title="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats" counter="BEATS_RD_TEX" offset="58" />
|
||||
<event name="MaliSCBusTexL2RdBy" title="Texture unit read bytes from L2 cache" description="The total number of bytes read from the L2 memory system by the texture unit." units="bytes" equation="(BEATS_RD_TEX) * (16)" />
|
||||
<event name="MaliSCBusTexL2RdByPerRd" title="Texture unit bytes read from L2 per texture cycle" description="The average number of bytes read from the L2 memory system by the texture unit per filtering cycle." units="bytes" equation="((BEATS_RD_TEX) * (16)) / (TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliSCBusTileWrBPerPx" title="Tile unit bytes written to L2 per pixel" description="The average number of bytes written to the L2 memory system by the tile unit per output pixel." units="bytes" equation="((BEATS_WR_TIB) * (16)) / ((JS0_TASKS) * (32) * (32))" />
|
||||
<event name="MaliSCBusTileWrBt" title="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats" counter="BEATS_WR_TIB" offset="62" />
|
||||
<event name="MaliSCBusTileWrBy" title="Tile unit write bytes to L2 memory system" description="The total number of bytes written to the L2 memory system by the tile write-back unit." units="bytes" equation="(BEATS_WR_TIB) * (16)" />
|
||||
<event name="MaliTexCPI" title="Texture filtering cycles per instruction" description="The average number of texture filtering cycles per instruction." units="cycles" equation="((TEX_FILT_NUM_OPERATIONS)) / (((TEX_MSGO_NUM_MSG)) * 4)" />
|
||||
<event name="MaliTexDataFetchStallCy" title="Texture fetch stall cycles" description="The number of cycles when a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" counter="TEX_TFCH_CLK_STALLED" offset="37" />
|
||||
<event name="MaliTexDescStallCy" title="Texture descriptor stall cycles" description="The number of cycles when a quad is stalled on texture descriptor fetch." units="cycles" counter="TEX_DFCH_CLK_STALLED" offset="36" />
|
||||
<event name="MaliTexFiltFullRate" title="Texture full speed filtering percentage" description="The percentage of texture filtering cycles using the full width of the texture filtering data path." units="percent" equation="(((TEX_FILT_NUM_FXR_OPERATIONS) + (TEX_FILT_NUM_FST_OPERATIONS)) / (TEX_FILT_NUM_OPERATIONS)) * 100" />
|
||||
<event name="MaliTexFiltIssueCy" title="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles" counter="TEX_FILT_NUM_OPERATIONS" offset="39" />
|
||||
<event name="MaliTexFiltStallCy" title="Texture filtering stall cycles" description="The number of cycles when the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" offset="38" />
|
||||
<event name="MaliTexFullBiFiltCy" title="Texture filtering cycles using full bilinear" description="The number of cycles when the filtering unit is filled with bilinear filtering." units="cycles" counter="TEX_FILT_NUM_FXR_OPERATIONS" offset="40" />
|
||||
<event name="MaliTexFullTriFiltCy" title="Texture filtering cycles using full trilinear" description="The number of cycles when the filtering unit is filled with trilinear filtering." units="cycles" counter="TEX_FILT_NUM_FST_OPERATIONS" offset="41" />
|
||||
<event name="MaliTexInBt" title="Texture message read beats" description="The number of texture request message data beats." units="beats" counter="TEX_MSGI_NUM_FLITS" offset="35" />
|
||||
<event name="MaliTexInBusUtil" title="Texture input bus utilization" description="The percentage load on the texture message input bus." units="percent" equation="((TEX_MSGI_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexIssueCy" title="Texture unit issue cycles" description="The number of cycles the texture unit was busy." units="cycles" equation="(TEX_FILT_NUM_OPERATIONS)" />
|
||||
<event name="MaliTexOutBt" title="Texture message write beats" description="The number of texture response message data beats." units="beats" counter="TEX_MSGO_NUM_FLITS" offset="43" />
|
||||
<event name="MaliTexOutBusUtil" title="Texture output bus utilization" description="The percentage load on the texture message output bus." units="percent" equation="((TEX_MSGO_NUM_FLITS) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliTexOutMsg" title="Texture messages" description="The number of output texture messages from the texture unit." units="issues" counter="TEX_MSGO_NUM_MSG" offset="42" />
|
||||
<event name="MaliTexQuads" title="Texture quads" description="The number of quad-width texture operations processed by the texture unit." units="quads" equation="(TEX_MSGO_NUM_MSG)" />
|
||||
<event name="MaliTexSample" title="Texture samples" description="The number of texture samples made." units="requests" equation="((TEX_MSGO_NUM_MSG)) * 4" />
|
||||
<event name="MaliTexUtil" title="Texture unit utilization" description="The percentage utilization of the texturing unit." units="percent" equation="(((TEX_FILT_NUM_OPERATIONS)) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
<event name="MaliVar16IssueCy" title="16-bit interpolation issue cycles" description="The number of 16-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_16) / (1)" />
|
||||
<event name="MaliVar16IssueSlot" title="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_16" offset="51" />
|
||||
<event name="MaliVar32IssueCy" title="32-bit interpolation issue cycles" description="The number of 32-bit interpolation cycles used by the varying unit." units="cycles" equation="(VARY_SLOT_32) / (1)" />
|
||||
<event name="MaliVar32IssueSlot" title="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues" counter="VARY_SLOT_32" offset="50" />
|
||||
<event name="MaliVarInstr" title="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests" counter="VARY_INSTR" offset="49" />
|
||||
<event name="MaliVarIssueCy" title="Varying unit issue cycles" description="The total number of varying unit issue cycles." units="cycles" equation="((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))" />
|
||||
<event name="MaliVarUtil" title="Varying unit utilization" description="The percentage utilization of the varying unit." units="percent" equation="((((VARY_SLOT_32) / (1)) + ((VARY_SLOT_16) / (1))) / (EXEC_CORE_ACTIVE)) * 100" />
|
||||
</category>
|
||||
<category name="Tiler">
|
||||
<event name="MaliGeomBackFacePrim" title="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" counter="BACK_FACING" offset="10" />
|
||||
<event name="MaliGeomFaceXYPlaneCullPrim" title="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" counter="PRIM_CULLED" offset="12" />
|
||||
<event name="MaliGeomFaceXYPlaneCullRate" title="Facing or XY plane test cull percentage" description="The percentage of primitives culled by the facing or frustum XY plane tests." units="percent" equation="((PRIM_CULLED) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomFrontFacePrim" title="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" counter="FRONT_FACING" offset="9" />
|
||||
<event name="MaliGeomLinePrim" title="Line primitives" description="The number of input line primitives." units="primitives" counter="LINES" offset="7" />
|
||||
<event name="MaliGeomPointPrim" title="Point primitives" description="The number of input point primitives." units="primitives" counter="POINTS" offset="8" />
|
||||
<event name="MaliGeomPosShadTask" title="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests" counter="IDVS_POS_SHAD_REQ" offset="21" />
|
||||
<event name="MaliGeomPosShadThread" title="Position shader thread invocations" description="The number of position shader thread invocations." units="threads" equation="(IDVS_POS_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomPosShadThreadPerPrim" title="Position threads per input primitive" description="The number of position shader invocations per input primitive." units="threads" equation="((IDVS_POS_SHAD_REQ) * (4)) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))" />
|
||||
<event name="MaliGeomSampleCullPrim" title="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" counter="PRIM_SAT_CULLED" offset="14" />
|
||||
<event name="MaliGeomSampleCullRate" title="Sample test cull percentage" description="The percentage of primitives culled by the sample coverage test." units="percent" equation="((PRIM_SAT_CULLED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED) - (PRIM_CLIPPED))) * 100" />
|
||||
<event name="MaliGeomTotalCullPrim" title="Culled primitives" description="The number of primitives that were culled during the rendering process." units="primitives" equation="(PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)" />
|
||||
<event name="MaliGeomTotalPrim" title="Total input primitives" description="The total number of input primitives to the rendering process." units="primitives" equation="((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomTrianglePrim" title="Triangle primitives" description="The number of input triangle primitives." units="primitives" counter="TRIANGLES" offset="6" />
|
||||
<event name="MaliGeomVarShadTask" title="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests" counter="IDVS_VAR_SHAD_REQ" offset="37" />
|
||||
<event name="MaliGeomVarShadThread" title="Varying shader thread invocations" description="The number of varying shader thread invocations." units="threads" equation="(IDVS_VAR_SHAD_REQ) * (4)" />
|
||||
<event name="MaliGeomVarShadThreadPerPrim" title="Varying threads per input primitive" description="The number of varying shader invocations per visible primitive." units="threads" equation="((IDVS_VAR_SHAD_REQ) * (4)) / (PRIM_VISIBLE)" />
|
||||
<event name="MaliGeomVisiblePrim" title="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" counter="PRIM_VISIBLE" offset="11" />
|
||||
<event name="MaliGeomVisibleRate" title="Visible primitive percentage" description="The percentage of primitives that are visible after culling." units="percent" equation="((PRIM_VISIBLE) / (((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE))) * 100" />
|
||||
<event name="MaliGeomZPlaneCullPrim" title="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" counter="PRIM_CLIPPED" offset="13" />
|
||||
<event name="MaliGeomZPlaneCullRate" title="Z plane test cull percentage" description="The percentage of primitives culled by the frustum Z plane test." units="percent" equation="((PRIM_CLIPPED) / ((((PRIM_CULLED) + (PRIM_CLIPPED) + (PRIM_SAT_CULLED)) + (PRIM_VISIBLE)) - (PRIM_CULLED))) * 100" />
|
||||
<event name="MaliTilerActiveCy" title="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles" counter="TILER_ACTIVE" offset="4" />
|
||||
<event name="MaliTilerPosCacheHit" title="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests" counter="VCACHE_HIT" offset="26" />
|
||||
<event name="MaliTilerPosCacheHitRate" title="Position cache hit percentage" description="The percentage hit rate of the tiler position cache." units="percent" equation="((VCACHE_HIT) / ((VCACHE_HIT) + (VCACHE_MISS))) * 100" />
|
||||
<event name="MaliTilerPosCacheMiss" title="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests" counter="VCACHE_MISS" offset="27" />
|
||||
<event name="MaliTilerPosShadFIFOFullCy" title="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles" counter="IDVS_POS_FIFO_FULL" offset="24" />
|
||||
<event name="MaliTilerPosShadStallCy" title="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles" counter="IDVS_POS_SHAD_STALL" offset="23" />
|
||||
<event name="MaliTilerRdBt" title="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" counter="BUS_READ" offset="17" />
|
||||
<event name="MaliTilerUtil" title="Tiler utilization" description="The percentage of GPU active cycles when the tiler has a workload queued for processing." units="percent" equation="((TILER_ACTIVE) / (GPU_ACTIVE)) * 100" />
|
||||
<event name="MaliTilerVarCacheHit" title="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" counter="IDVS_VBU_HIT" offset="34" />
|
||||
<event name="MaliTilerVarCacheHitRate" title="Varying cache hit percentage" description="The percentage hit rate of the tiler varying cache." units="percent" equation="((IDVS_VBU_HIT) / ((IDVS_VBU_HIT) + (IDVS_VBU_MISS))) * 100" />
|
||||
<event name="MaliTilerVarCacheMiss" title="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" counter="IDVS_VBU_MISS" offset="35" />
|
||||
<event name="MaliTilerVarShadStallCy" title="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles" counter="IDVS_VAR_SHAD_STALL" offset="38" />
|
||||
</category>
|
||||
</metrics>
|
||||
|
|
@ -3,8 +3,20 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
|
||||
pan_hw_metrics = [
|
||||
'G31', 'G51', 'G52', 'G57', 'G68', 'G71', 'G72', 'G76', 'G77',
|
||||
'G78', 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x',
|
||||
'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x',
|
||||
'generated/G31',
|
||||
'generated/G51',
|
||||
'generated/G52',
|
||||
'generated/G71',
|
||||
'generated/G72',
|
||||
'generated/G76',
|
||||
'generated/G77',
|
||||
'generated/G78',
|
||||
'generated/G710',
|
||||
'generated/G715',
|
||||
'generated/G720',
|
||||
'generated/G725',
|
||||
'generated/G1',
|
||||
]
|
||||
|
||||
pan_hw_metrics_xml_files = []
|
||||
|
|
|
|||
|
|
@ -4,9 +4,15 @@
|
|||
import argparse
|
||||
import textwrap
|
||||
import os
|
||||
import datetime
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import ClassVar
|
||||
|
||||
import xml.etree.ElementTree as et
|
||||
|
||||
TAB_SIZE = 3
|
||||
|
||||
|
||||
class SourceFile:
|
||||
def __init__(self, filename):
|
||||
|
|
@ -14,7 +20,7 @@ class SourceFile:
|
|||
self._indent = 0
|
||||
|
||||
def write(self, *args):
|
||||
code = ' '.join(map(str,args))
|
||||
code = ' '.join(map(str, args))
|
||||
for line in code.splitlines():
|
||||
text = ''.rjust(self._indent) + line
|
||||
self.file.write(text.rstrip() + "\n")
|
||||
|
|
@ -26,6 +32,17 @@ class SourceFile:
|
|||
self._indent -= n
|
||||
|
||||
|
||||
CATEGORY_IDX_REMAP = {
|
||||
"GPU Front-end": "PAN_PERF_COUNTER_CAT_FRONTEND",
|
||||
"Job Manager": "PAN_PERF_COUNTER_CAT_FRONTEND",
|
||||
"CSF": "PAN_PERF_COUNTER_CAT_FRONTEND",
|
||||
"Tiler": "PAN_PERF_COUNTER_CAT_TILER",
|
||||
"Memory System" : "PAN_PERF_COUNTER_CAT_MEMSYS",
|
||||
"L2 Cache": "PAN_PERF_COUNTER_CAT_MEMSYS",
|
||||
"Shader Core": "PAN_PERF_COUNTER_CAT_SHADER",
|
||||
}
|
||||
|
||||
|
||||
class Counter:
|
||||
# category Category owning the counter
|
||||
# xml XML representation of itself
|
||||
|
|
@ -35,12 +52,18 @@ class Counter:
|
|||
self.name = self.xml.get("name")
|
||||
self.desc = self.xml.get("description")
|
||||
self.units = self.xml.get("units")
|
||||
self.offset = int(self.xml.get("offset"))
|
||||
self.underscore_name = self.xml.get("counter").lower()
|
||||
self.equation = self.xml.get("equation")
|
||||
self.offset = int(self.xml.get("offset") or 0)
|
||||
self.underscore_name = (self.xml.get("counter") or "").lower()
|
||||
self.source_name = self.xml.get("counter") or ""
|
||||
self.equation_impl = None
|
||||
|
||||
if self.units.endswith("/second"):
|
||||
self.units = self.units.replace("/second", "_per_second")
|
||||
|
||||
|
||||
class Category:
|
||||
# product Product owning the gategory
|
||||
# product Product owning the category
|
||||
# xml XML representation of itself
|
||||
def __init__(self, product, xml):
|
||||
self.product = product
|
||||
|
|
@ -60,6 +83,7 @@ class Product:
|
|||
self.filename = filename
|
||||
self.xml = et.parse(self.filename)
|
||||
self.name = self.xml.getroot().get('id')
|
||||
assert(self.name is not None)
|
||||
self.id = self.name.lower()
|
||||
self.categories = []
|
||||
|
||||
|
|
@ -67,6 +91,114 @@ class Product:
|
|||
self.categories.append(Category(self, xml_cat))
|
||||
|
||||
|
||||
@dataclass
|
||||
class EquationImpl:
|
||||
fname: str
|
||||
body: str
|
||||
counter: Counter
|
||||
version: int = -1
|
||||
|
||||
impls: ClassVar[dict[str, dict[str, 'EquationImpl']]] = {}
|
||||
|
||||
"""We don't want duplicate methods wasting space, this makes sure there is
|
||||
only one implementation for each variant of counter hardware locations.
|
||||
"""
|
||||
@classmethod
|
||||
def get(cls, counter, all_counters):
|
||||
|
||||
body = cls.generate_body(counter, all_counters)
|
||||
|
||||
if counter.name not in cls.impls:
|
||||
cls.impls[counter.name] = {}
|
||||
|
||||
bucket = cls.impls[counter.name]
|
||||
|
||||
if body not in bucket:
|
||||
fname = f"compute_{counter.name.lower()}"
|
||||
eq = EquationImpl(fname, body, counter)
|
||||
eq.version = len(bucket.keys())
|
||||
bucket[body] = eq
|
||||
|
||||
return bucket[body]
|
||||
|
||||
@staticmethod
|
||||
def generate_body(counter, counters):
|
||||
eq = counter.equation
|
||||
|
||||
vals = dict()
|
||||
|
||||
for c in sorted(counters, key=lambda x: len(x.source_name), reverse=True):
|
||||
if c.source_name == "" or c.source_name not in eq:
|
||||
continue
|
||||
|
||||
idx = len(vals)
|
||||
cat_enum = CATEGORY_IDX_REMAP[c.category.name]
|
||||
# MaliAnyUtil for example is from "Shader Core" but it reads GPU_ACTIVE
|
||||
# which is from "Front-end". We can not use the block index from the
|
||||
# shader core when reading a front-end counter.
|
||||
# If reading from another block for the equation the only block index that
|
||||
# makes sense is 0 because if the category had more than one block we
|
||||
# could not know which one to choose.
|
||||
from_block = 'block' if c.category.name == counter.category.name else '0'
|
||||
r = f"const double v{idx} = pan_perf_counter_read_raw(perf, {cat_enum}, {c.offset}, {from_block});"
|
||||
vals[c.source_name] = (idx, r)
|
||||
|
||||
eq = eq.replace(c.source_name, f"v{idx}")
|
||||
|
||||
for match in re.finditer(r"(MALI_CONFIG[a-zA-Z0-9_]+)($|[^a-zA-Z0-9_])", eq):
|
||||
config = match.group(1)
|
||||
|
||||
idx = len(vals)
|
||||
pan_config = config.replace("MALI", "PAN_PERF_DERIVED")
|
||||
r = f"const double v{idx} = configs[{pan_config}];"
|
||||
vals[config] = (idx, r)
|
||||
|
||||
eq = eq.replace(config, f"v{idx}")
|
||||
|
||||
defs = [r for _, r in vals.values()]
|
||||
body = "\n".join(defs) + "\n"
|
||||
body += f"return {eq};"
|
||||
|
||||
return body
|
||||
|
||||
@property
|
||||
def versioned_name(self):
|
||||
assert (self.version != -1 and "should not emit non versioned")
|
||||
return self.fname + f"_v{self.version}"
|
||||
|
||||
@property
|
||||
def decl(self):
|
||||
decl = "double " + self.versioned_name + \
|
||||
"(const struct pan_perf *perf, const double *configs, uint8_t block)"
|
||||
return decl
|
||||
|
||||
|
||||
def generate_equations(prods, c):
|
||||
for prod in prods:
|
||||
|
||||
all_raw_counters = []
|
||||
for cat in prod.categories:
|
||||
for counter in cat.counters:
|
||||
if counter.source_name:
|
||||
all_raw_counters.append(counter)
|
||||
|
||||
for cat in prod.categories:
|
||||
for counter in cat.counters:
|
||||
if not counter.equation:
|
||||
continue
|
||||
|
||||
eq = EquationImpl.get(counter, all_raw_counters)
|
||||
counter.equation_impl = eq
|
||||
|
||||
for impls in EquationImpl.impls.values():
|
||||
for impl in impls.values():
|
||||
c.write("static " + impl.decl + "{")
|
||||
c.indent(TAB_SIZE)
|
||||
c.write(impl.body)
|
||||
c.outdent(TAB_SIZE)
|
||||
c.write("}\n")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--header", help="Header file to write", required=True)
|
||||
|
|
@ -82,17 +214,17 @@ def main():
|
|||
for xml_file in args.xml_files:
|
||||
prods.append(Product(xml_file))
|
||||
|
||||
tab_size = 3
|
||||
tab_size = TAB_SIZE
|
||||
|
||||
copyright = textwrap.dedent("""\
|
||||
/* Autogenerated file, DO NOT EDIT manually! generated by {}
|
||||
*
|
||||
* Copyright © 2021 Arm Limited
|
||||
* Copyright © 2021 Collabora Ltd.
|
||||
* Copyright © {year} Arm Limited
|
||||
* Copyright © {year} Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
""").format(os.path.basename(__file__))
|
||||
""").format(os.path.basename(__file__), year=datetime.datetime.now().year)
|
||||
|
||||
h.write(copyright)
|
||||
h.write(textwrap.dedent("""\
|
||||
|
|
@ -110,6 +242,79 @@ def main():
|
|||
#include <util/macros.h>
|
||||
"""))
|
||||
|
||||
c.write(textwrap.dedent("""
|
||||
static inline int max2(int a, int b) {
|
||||
return MAX2(a, b);
|
||||
}
|
||||
|
||||
static inline int max3(int a, int b, int c) {
|
||||
return max2(max2(a, b), c);
|
||||
}
|
||||
|
||||
static inline int max4(int a, int b, int c, int d) {
|
||||
return max2(max3(a, b, c), d);
|
||||
}
|
||||
|
||||
static inline int max5(int a, int b, int c, int d, int e) {
|
||||
return max2(max4(a, b, c, d), e);
|
||||
}
|
||||
|
||||
static inline int max6(int a, int b, int c, int d, int e, int f) {
|
||||
return max2(max5(a, b, c, d, e), f);
|
||||
}
|
||||
|
||||
static inline int max7(int a, int b, int c, int d, int e, int f, int g) {
|
||||
return max2(max6(a, b, c, d, e, f), g);
|
||||
}
|
||||
|
||||
static inline int max8(int a, int b, int c, int d, int e, int f, int g, int h) {
|
||||
return max2(max7(a, b, c, d, e, f, g), h);
|
||||
}
|
||||
|
||||
static inline int max9(int a, int b, int c, int d, int e, int f, int g, int h, int i) {
|
||||
return max2(max8(a, b, c, d, e, f, g, h), i);
|
||||
}
|
||||
|
||||
static inline int min2(int a, int b) {
|
||||
return MIN2(a, b);
|
||||
}
|
||||
|
||||
static inline int min3(int a, int b, int c) {
|
||||
return min2(min2(a, b), c);
|
||||
}
|
||||
|
||||
static inline int min4(int a, int b, int c, int d) {
|
||||
return min2(min3(a, b, c), d);
|
||||
}
|
||||
|
||||
static inline int min5(int a, int b, int c, int d, int e) {
|
||||
return min2(min4(a, b, c, d), e);
|
||||
}
|
||||
|
||||
static inline int min6(int a, int b, int c, int d, int e, int f) {
|
||||
return min2(min5(a, b, c, d, e), f);
|
||||
}
|
||||
|
||||
static inline int min7(int a, int b, int c, int d, int e, int f, int g) {
|
||||
return min2(min6(a, b, c, d, e, f), g);
|
||||
}
|
||||
|
||||
static inline int min8(int a, int b, int c, int d, int e, int f, int g, int h) {
|
||||
return min2(min7(a, b, c, d, e, f, g), h);
|
||||
}
|
||||
|
||||
static inline int min9(int a, int b, int c, int d, int e, int f, int g, int h, int i) {
|
||||
return min2(min8(a, b, c, d, e, f, g, h), i);
|
||||
}
|
||||
|
||||
#define GET_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,name,...) name
|
||||
#define min(...) GET_MACRO(__VA_ARGS__, min9, min8, min7, min6, min5, min4, min3, min2)(__VA_ARGS__)
|
||||
#define max(...) GET_MACRO(__VA_ARGS__, max9, max8, max7, max6, max5, max4, max3, max2)(__VA_ARGS__)
|
||||
|
||||
"""))
|
||||
|
||||
generate_equations(prods, c)
|
||||
|
||||
for prod in prods:
|
||||
c.write(textwrap.dedent("""
|
||||
static void UNUSED
|
||||
|
|
@ -119,7 +324,7 @@ def main():
|
|||
c.indent(tab_size)
|
||||
|
||||
n_categories = len(prod.categories)
|
||||
c.write("STATIC_ASSERT(%u <= PAN_PERF_MAX_CATEGORIES);" % n_categories)
|
||||
c.write("STATIC_ASSERT(%u <= PAN_PERF_COUNTER_CAT_MAX);" % n_categories)
|
||||
n_counters = 0
|
||||
for category in prod.categories:
|
||||
category_counters_count = len(category.counters)
|
||||
|
|
@ -145,7 +350,7 @@ def main():
|
|||
for i in range(0, len(prod.categories)):
|
||||
category = prod.categories[i]
|
||||
|
||||
c.write("{")
|
||||
c.write("[%s] = {" % CATEGORY_IDX_REMAP[category.name])
|
||||
c.indent(tab_size)
|
||||
c.write(".name = \"%s\"," % (category.name))
|
||||
c.write(".n_counters = %u," % (len(category.counters)))
|
||||
|
|
@ -164,7 +369,11 @@ def main():
|
|||
c.write(".symbol_name = \"%s\"," % (counter.underscore_name))
|
||||
c.write(".units = PAN_PERF_COUNTER_UNITS_%s," % (counter.units.upper()))
|
||||
c.write(".offset = %u," % (counter.offset))
|
||||
c.write(".category_index = %u," % i)
|
||||
c.write(".category = %s," % CATEGORY_IDX_REMAP[category.name])
|
||||
if counter.equation:
|
||||
c.write(f".derived = {counter.equation_impl.versioned_name},")
|
||||
else:
|
||||
c.write(".derived = NULL,")
|
||||
|
||||
c.outdent(tab_size)
|
||||
c.write("}, // counter")
|
||||
|
|
@ -188,7 +397,7 @@ def main():
|
|||
c.write("\nconst struct pan_perf_config * pan_perf_configs[] = {")
|
||||
c.indent(tab_size)
|
||||
for prod in prods:
|
||||
c.write("&pan_perf_config_%s," % prod.id)
|
||||
c.write("&pan_perf_config_%s," % prod.id)
|
||||
c.outdent(tab_size)
|
||||
c.write("};")
|
||||
|
||||
|
|
|
|||
286
src/panfrost/perf/pan_gen_perf_defs.py
Normal file
286
src/panfrost/perf/pan_gen_perf_defs.py
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
# Copyright (c) 2026 Arm Ltd.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
import datetime
|
||||
import subprocess
|
||||
import xml.etree.ElementTree as et
|
||||
import re
|
||||
|
||||
COUNTERINFO_PATH = "./specification/database/counterinfo"
|
||||
HARDWARE_LAYOUT_PATH = "./specification/database/hardwarelayout"
|
||||
|
||||
HW_LAYOUT_LUT: dict[str, "HardwareLayout"] = {}
|
||||
|
||||
OUTPUT_COPYRIGHT = """<!--
|
||||
Copyright (c) {year} Arm, Ltd.
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
Generated from libGPUCounters @ {rev}.
|
||||
https://github.com/ARM-software/libGPUCounters
|
||||
which is:
|
||||
Copyright (c) 2023-2025 Arm Limited
|
||||
SPDX-License-Identifier: MIT
|
||||
-->
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def get_revision(path):
|
||||
cmd = ["git", "rev-parse", "HEAD"]
|
||||
res = subprocess.run(cmd, capture_output=True, cwd=path.as_posix())
|
||||
if res.returncode != 0:
|
||||
return None
|
||||
else:
|
||||
return res.stdout.decode().strip()
|
||||
|
||||
|
||||
def map_nn(v, f):
|
||||
return None if v is None else f(v)
|
||||
|
||||
|
||||
def get_elem_text(xml, name):
|
||||
e = xml.find(name)
|
||||
if e is not None:
|
||||
return e.text
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CounterHwLocation:
|
||||
block: str
|
||||
counter_index: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class HardwareLayout:
|
||||
gpu_name: str
|
||||
# map source name to (block index, counter index)
|
||||
locations: dict[str, CounterHwLocation]
|
||||
|
||||
@staticmethod
|
||||
def from_xml(xml: et.Element) -> "HardwareLayout":
|
||||
gpu_name = xml.get("gpu")
|
||||
assert gpu_name is not None
|
||||
locations = {}
|
||||
for cbe in xml.findall("CounterBlock"):
|
||||
cb_name = cbe.get("type")
|
||||
assert cb_name is not None
|
||||
for counter in cbe.findall("Counter"):
|
||||
source_name = counter.get("name")
|
||||
counter_index = counter.get("index")
|
||||
assert counter_index is not None
|
||||
locations[source_name] = CounterHwLocation(
|
||||
cb_name, int(counter_index))
|
||||
|
||||
return HardwareLayout(gpu_name=gpu_name, locations=locations)
|
||||
|
||||
|
||||
def parse_hw_layout(path: Path):
|
||||
xml = et.parse(path)
|
||||
return HardwareLayout.from_xml(xml.getroot())
|
||||
|
||||
|
||||
def parse_supported_gpus(xml):
|
||||
supported_list = xml.find("SupportedGPUs")
|
||||
return [e.text for e in supported_list.findall("GPU")]
|
||||
|
||||
|
||||
def group_from_filename(fname):
|
||||
# This maps to the values of the "type" field in the CounterBlock xml blocks.
|
||||
fname_to_dbkey = {
|
||||
"GPUFrontEnd": "GPU Front-end",
|
||||
"L2Cache": "Memory System",
|
||||
"Tiler": "Tiler",
|
||||
"ShaderCore": "Shader Core",
|
||||
"Constants": "Constants",
|
||||
"Content": "Content",
|
||||
}
|
||||
for name, key in fname_to_dbkey.items():
|
||||
if name in fname:
|
||||
return key
|
||||
assert False and "could not find group from filename"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CounterInfo:
|
||||
machine_name: str
|
||||
supported_gpus: list[str]
|
||||
group: str
|
||||
equation: str = ""
|
||||
source_name: str = ""
|
||||
# Can be used as a fallback to find hw offsets if source_name isn't available.
|
||||
source_alias_name: str = ""
|
||||
human_name: str = ""
|
||||
short_desc: str = ""
|
||||
units: str = ""
|
||||
|
||||
@staticmethod
|
||||
def from_xml(xml, group):
|
||||
machine_name = get_elem_text(xml, "MachineName")
|
||||
assert machine_name is not None
|
||||
supported = parse_supported_gpus(xml)
|
||||
|
||||
desc_raw = get_elem_text(xml, "ShortDescription") or ""
|
||||
desc_san = " ".join(map(str.strip, desc_raw.splitlines())).strip()
|
||||
|
||||
return CounterInfo(
|
||||
machine_name,
|
||||
supported,
|
||||
group,
|
||||
equation=map_nn(get_elem_text(xml, "Equation"), str.strip) or "",
|
||||
source_name=get_elem_text(xml, "SourceName") or "",
|
||||
source_alias_name=get_elem_text(xml, "SourceAlias") or "",
|
||||
human_name=get_elem_text(xml, "HumanName") or "",
|
||||
short_desc=desc_san,
|
||||
units=(get_elem_text(xml, "Units") or "").strip(),
|
||||
)
|
||||
|
||||
def is_derived(self):
|
||||
return not self.source_name
|
||||
|
||||
def get_hw_offsets(self, gpu: str) -> CounterHwLocation:
|
||||
assert self.source_name != ""
|
||||
assert gpu in self.supported_gpus
|
||||
locs = HW_LAYOUT_LUT[gpu].locations
|
||||
if self.source_name in locs:
|
||||
return locs[self.source_name]
|
||||
else:
|
||||
# If the normal source name doesn't work try the alias
|
||||
# Needed for example for RT_RAY_BOX_ISSUED on G1 which is using the
|
||||
# alias RT_BOX_ISSUE_CYCLES there.
|
||||
assert self.source_alias_name != ""
|
||||
return locs[self.source_alias_name]
|
||||
|
||||
def is_supported(self):
|
||||
return "MALI_CONFIG_TIME_SPAN" not in self.equation
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProductInfo:
|
||||
product_id: str
|
||||
database_key: str
|
||||
|
||||
|
||||
def parse_counters(path: Path):
|
||||
group = group_from_filename(path.name)
|
||||
xml = et.parse(path)
|
||||
return [CounterInfo.from_xml(e, group) for e in xml.findall("CounterInfo")]
|
||||
|
||||
|
||||
def resolve_equation(eq: str, counters_gpu: list[CounterInfo]):
|
||||
sorted_c = sorted(counters_gpu, key=lambda c: len(c.machine_name))
|
||||
max_len = max([len(c.machine_name) for c in sorted_c])
|
||||
|
||||
# This loop replaces variables which aren't hardware counters or config values
|
||||
# until only all have been replaced.
|
||||
# Iterate backwards from the largest to the smallest variable to make this work:
|
||||
# eq = MaliMainQueueTask * MaliMainQueueTaskSize * MaliMainQueueTaskSize
|
||||
|
||||
progress = True
|
||||
while progress:
|
||||
progress = False
|
||||
for l in range(max_len, 0, -1):
|
||||
for c in filter(lambda c: len(c.machine_name) == l, sorted_c):
|
||||
if c.machine_name in eq:
|
||||
if c.is_derived():
|
||||
repl = f"({c.equation})"
|
||||
else:
|
||||
assert c.source_name is not None
|
||||
repl = f"({c.source_name})"
|
||||
|
||||
eq = eq.replace(c.machine_name, repl)
|
||||
progress = True
|
||||
break
|
||||
|
||||
# There was a change, need to restart because we might have added
|
||||
# a variable with len(name) > l.
|
||||
if progress:
|
||||
break
|
||||
return eq
|
||||
|
||||
|
||||
def counter_list_to_xml(counters: list[CounterInfo], gpu: str):
|
||||
gpu_xml = gpu.replace("Mali-", "").replace("Mali", "").strip()
|
||||
root = et.Element("metrics", attrib={"id": gpu_xml})
|
||||
|
||||
IGNORE_CATS = {"Constants", "Content"}
|
||||
|
||||
cat_names = set([c.group for c in counters])
|
||||
categories = dict()
|
||||
for c in sorted(cat_names):
|
||||
if c in IGNORE_CATS:
|
||||
continue
|
||||
categories[c] = et.SubElement(root, "category", attrib={"name": c})
|
||||
|
||||
for counter in sorted(counters, key=lambda c: c.machine_name):
|
||||
if not counter.is_supported():
|
||||
continue
|
||||
|
||||
if counter.group in IGNORE_CATS:
|
||||
continue
|
||||
p = categories[counter.group]
|
||||
|
||||
attrib = {
|
||||
"name": counter.machine_name,
|
||||
"title": counter.human_name,
|
||||
"description": counter.short_desc,
|
||||
"units": counter.units,
|
||||
}
|
||||
|
||||
if counter.is_derived():
|
||||
attrib["equation"] = resolve_equation(counter.equation, counters)
|
||||
else:
|
||||
attrib["counter"] = counter.source_name
|
||||
attrib["offset"] = str(counter.get_hw_offsets(gpu).counter_index)
|
||||
|
||||
et.SubElement(p, "event", attrib)
|
||||
|
||||
return root
|
||||
|
||||
|
||||
def main():
|
||||
p = ArgumentParser()
|
||||
p.add_argument("lib_gpu_counters", type=Path,
|
||||
help="Path to libGPUCounter source")
|
||||
p.add_argument(
|
||||
"--output-path", type=Path, default=Path(__file__).parent / "generated"
|
||||
)
|
||||
args = p.parse_args()
|
||||
|
||||
for f in (args.lib_gpu_counters / HARDWARE_LAYOUT_PATH).glob("*.xml"):
|
||||
l = parse_hw_layout(f)
|
||||
HW_LAYOUT_LUT[l.gpu_name] = l
|
||||
|
||||
counters: list[CounterInfo] = []
|
||||
for f in (args.lib_gpu_counters / COUNTERINFO_PATH).glob("*.xml"):
|
||||
counters += parse_counters(f)
|
||||
|
||||
args.output_path.mkdir(exist_ok=True)
|
||||
|
||||
# Generate one file for each GPU.
|
||||
all_gpus = set().union(*(c.supported_gpus for c in counters))
|
||||
for gpu in all_gpus:
|
||||
gpu_counters = [c for c in counters if gpu in c.supported_gpus]
|
||||
xml = counter_list_to_xml(gpu_counters, gpu)
|
||||
et.indent(xml)
|
||||
|
||||
fname = gpu.replace("Mali-", "").replace("Mali", "").strip() + ".xml"
|
||||
year = datetime.datetime.now().year
|
||||
rev = get_revision(args.lib_gpu_counters)
|
||||
assert(rev is not None)
|
||||
|
||||
with open(args.output_path / fname, "wb") as f:
|
||||
f.write(
|
||||
OUTPUT_COPYRIGHT.format(
|
||||
year=year, rev=rev).encode(encoding="utf-8")
|
||||
)
|
||||
f.write(et.tostring(xml, encoding="utf-8"))
|
||||
f.write("\n".encode(encoding="utf-8"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2021 Collabora, Ltd.
|
||||
* Copyright © 2026 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -8,7 +9,6 @@
|
|||
#include <xf86drm.h>
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include "pan_perf.h"
|
||||
|
||||
|
|
@ -17,23 +17,29 @@
|
|||
#include <lib/pan_props.h>
|
||||
#include <pan_perf_metrics.h>
|
||||
|
||||
#define PAN_COUNTERS_PER_CATEGORY 64
|
||||
#define PAN_SHADER_CORE_INDEX 3
|
||||
|
||||
uint32_t
|
||||
pan_perf_counter_read(const struct pan_perf_counter *counter,
|
||||
const struct pan_perf *perf)
|
||||
int64_t
|
||||
pan_perf_counter_read(const struct pan_perf *perf,
|
||||
const struct pan_perf_counter *counter, uint8_t block)
|
||||
{
|
||||
unsigned offset = perf->category_offset[counter->category_index];
|
||||
offset += counter->offset;
|
||||
assert(offset < perf->n_counter_values);
|
||||
if (counter->derived != NULL)
|
||||
return counter->derived(perf, perf->derived_configs, block);
|
||||
else
|
||||
return pan_perf_counter_read_raw(perf, counter->category, counter->offset, block);
|
||||
}
|
||||
|
||||
uint32_t ret = perf->counter_values[offset];
|
||||
int64_t
|
||||
pan_perf_counter_read_block_sum(const struct pan_perf_counter *counter,
|
||||
const struct pan_perf *perf)
|
||||
{
|
||||
int64_t ret = pan_perf_counter_read(perf, counter, 0);
|
||||
|
||||
// If counter belongs to shader core, accumulate values for all other cores
|
||||
if (counter->category_index == PAN_SHADER_CORE_INDEX) {
|
||||
for (uint32_t core = 1; core < perf->core_id_range; ++core) {
|
||||
ret += perf->counter_values[offset + PAN_COUNTERS_PER_CATEGORY * core];
|
||||
/* If counter belongs to shader core, sum values for all cores. */
|
||||
if (counter->category == PAN_PERF_COUNTER_CAT_SHADER) {
|
||||
uint32_t n_cores =
|
||||
perf->mem_layout.category[PAN_PERF_COUNTER_CAT_SHADER].n_blocks;
|
||||
for (uint32_t core = 1; core < n_cores; ++core) {
|
||||
ret += pan_perf_counter_read(perf, counter, core);
|
||||
assert(ret >= 0 && "counter sum should not overflow");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -54,16 +60,12 @@ pan_lookup_counters(const char *name)
|
|||
void
|
||||
pan_perf_init(struct pan_perf *perf, int fd)
|
||||
{
|
||||
ASSERTED drmVersionPtr version = drmGetVersion(fd);
|
||||
|
||||
/* We only support panfrost at the moment. */
|
||||
assert(version && !strcmp(version->name, "panfrost"));
|
||||
|
||||
drmFreeVersion(version);
|
||||
|
||||
perf->dev = pan_kmod_dev_create(fd, 0, NULL);
|
||||
assert(perf->dev);
|
||||
|
||||
perf->session = pan_kmod_perf_create(perf->dev);
|
||||
assert(perf->session);
|
||||
|
||||
struct pan_kmod_dev_props props = perf->dev->props;
|
||||
|
||||
const struct pan_model *model =
|
||||
|
|
@ -76,49 +78,59 @@ pan_perf_init(struct pan_perf *perf, int fd)
|
|||
if (perf->cfg == NULL)
|
||||
UNREACHABLE("Performance counters missing!");
|
||||
|
||||
// Generally counter blocks are laid out in the following order:
|
||||
// Job manager, tiler, one or more L2 caches, and one or more shader cores.
|
||||
unsigned l2_slices = pan_query_l2_slices(&props);
|
||||
pan_query_core_count(&props, &perf->core_id_range);
|
||||
pan_kmod_perf_query_layout(perf->session, &perf->mem_layout);
|
||||
|
||||
uint32_t n_blocks = 2 + l2_slices + perf->core_id_range;
|
||||
perf->n_counter_values = PAN_COUNTERS_PER_CATEGORY * n_blocks;
|
||||
perf->counter_values = ralloc_array(perf, uint32_t, perf->n_counter_values);
|
||||
unsigned unused;
|
||||
|
||||
/* Setup the layout */
|
||||
perf->category_offset[0] = PAN_COUNTERS_PER_CATEGORY * 0;
|
||||
perf->category_offset[1] = PAN_COUNTERS_PER_CATEGORY * 1;
|
||||
perf->category_offset[2] = PAN_COUNTERS_PER_CATEGORY * 2;
|
||||
perf->category_offset[3] = PAN_COUNTERS_PER_CATEGORY * (2 + l2_slices);
|
||||
}
|
||||
|
||||
static int
|
||||
pan_perf_query(struct pan_perf *perf, uint32_t enable)
|
||||
{
|
||||
struct drm_panfrost_perfcnt_enable perfcnt_enable = {enable, 0};
|
||||
return pan_kmod_ioctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_ENABLE,
|
||||
&perfcnt_enable);
|
||||
perf->derived_configs[PAN_PERF_DERIVED_CONFIG_SHADER_CORE_COUNT] =
|
||||
pan_query_core_count(&props, &unused);
|
||||
perf->derived_configs[PAN_PERF_DERIVED_CONFIG_L2_CACHE_COUNT] =
|
||||
pan_query_l2_slices(&props);
|
||||
perf->derived_configs[PAN_PERF_DERIVED_CONFIG_EXT_BUS_BYTE_SIZE] =
|
||||
pan_query_bus_width(&props);
|
||||
}
|
||||
|
||||
int
|
||||
pan_perf_enable(struct pan_perf *perf)
|
||||
{
|
||||
return pan_perf_query(perf, 1 /* enable */);
|
||||
return pan_kmod_perf_enable(perf->session);
|
||||
}
|
||||
|
||||
int
|
||||
pan_perf_disable(struct pan_perf *perf)
|
||||
{
|
||||
return pan_perf_query(perf, 0 /* disable */);
|
||||
return pan_kmod_perf_disable(perf->session);
|
||||
}
|
||||
|
||||
void
|
||||
pan_perf_finish(struct pan_perf *perf)
|
||||
{
|
||||
pan_kmod_perf_destroy(perf->session);
|
||||
}
|
||||
|
||||
int
|
||||
pan_perf_dump(struct pan_perf *perf)
|
||||
{
|
||||
// Dump performance counter values to the memory buffer pointed to by
|
||||
// counter_values
|
||||
struct drm_panfrost_perfcnt_dump perfcnt_dump = {
|
||||
(uint64_t)(uintptr_t)perf->counter_values};
|
||||
return pan_kmod_ioctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_DUMP,
|
||||
&perfcnt_dump);
|
||||
return pan_kmod_perf_dump(perf->session);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
pan_perf_get_timestamp(const struct pan_perf *perf)
|
||||
{
|
||||
return perf->session->data_ts;
|
||||
}
|
||||
|
||||
bool
|
||||
pan_perf_timestamp_supported(const struct pan_perf *perf)
|
||||
{
|
||||
return perf->session->data_ts_supported;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
pan_perf_get_min_sampling_period(const struct pan_perf *perf)
|
||||
{
|
||||
if (pan_arch(perf->dev->props.gpu_id) < 10)
|
||||
return 1000000;
|
||||
else
|
||||
return 500000;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,18 +8,23 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <lib/kmod/pan_kmod.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define PAN_PERF_MAX_CATEGORIES 4
|
||||
#define PAN_PERF_MAX_COUNTERS 64
|
||||
/* 128 hardware counters, but there can be more derived ones. */
|
||||
#define PAN_PERF_MAX_COUNTERS 190
|
||||
|
||||
struct pan_kmod_dev;
|
||||
struct pan_kmod_dev_props;
|
||||
struct pan_model;
|
||||
struct pan_perf_category;
|
||||
struct pan_perf;
|
||||
enum pan_perf_counter_categories {
|
||||
PAN_PERF_COUNTER_CAT_FRONTEND,
|
||||
PAN_PERF_COUNTER_CAT_TILER,
|
||||
PAN_PERF_COUNTER_CAT_MEMSYS,
|
||||
PAN_PERF_COUNTER_CAT_SHADER,
|
||||
/* Must be last. */
|
||||
PAN_PERF_COUNTER_CAT_MAX,
|
||||
};
|
||||
|
||||
enum pan_perf_counter_units {
|
||||
PAN_PERF_COUNTER_UNITS_CYCLES,
|
||||
|
|
@ -37,8 +42,24 @@ enum pan_perf_counter_units {
|
|||
PAN_PERF_COUNTER_UNITS_BYTES,
|
||||
PAN_PERF_COUNTER_UNITS_PIXELS,
|
||||
PAN_PERF_COUNTER_UNITS_ISSUES,
|
||||
PAN_PERF_COUNTER_UNITS_INTERRUPTS,
|
||||
PAN_PERF_COUNTER_UNITS_PERCENT,
|
||||
PAN_PERF_COUNTER_UNITS_TESTS,
|
||||
PAN_PERF_COUNTER_UNITS_RAYS,
|
||||
PAN_PERF_COUNTER_UNITS_NODES,
|
||||
PAN_PERF_COUNTER_UNITS_BOXES,
|
||||
PAN_PERF_COUNTER_UNITS_BYTES_PER_SECOND,
|
||||
};
|
||||
|
||||
enum pan_perf_derived_config {
|
||||
PAN_PERF_DERIVED_CONFIG_SHADER_CORE_COUNT,
|
||||
PAN_PERF_DERIVED_CONFIG_EXT_BUS_BYTE_SIZE,
|
||||
PAN_PERF_DERIVED_CONFIG_L2_CACHE_COUNT,
|
||||
PAN_PERF_DERIVED_CONFIG_LAST = PAN_PERF_DERIVED_CONFIG_L2_CACHE_COUNT,
|
||||
};
|
||||
|
||||
struct pan_perf;
|
||||
|
||||
struct pan_perf_counter {
|
||||
const char *name;
|
||||
const char *desc;
|
||||
|
|
@ -46,7 +67,10 @@ struct pan_perf_counter {
|
|||
enum pan_perf_counter_units units;
|
||||
// Offset of this counter's value within the category
|
||||
uint32_t offset;
|
||||
unsigned category_index;
|
||||
enum pan_perf_counter_categories category;
|
||||
|
||||
/* Optional, function to compute the derived counters value. */
|
||||
double (*derived)(const struct pan_perf*, const double*, uint8_t);
|
||||
};
|
||||
|
||||
struct pan_perf_category {
|
||||
|
|
@ -62,25 +86,45 @@ struct pan_perf_category {
|
|||
struct pan_perf_config {
|
||||
const char *name;
|
||||
|
||||
struct pan_perf_category categories[PAN_PERF_MAX_CATEGORIES];
|
||||
struct pan_perf_category categories[PAN_PERF_COUNTER_CAT_MAX];
|
||||
uint32_t n_categories;
|
||||
};
|
||||
|
||||
struct pan_perf {
|
||||
struct pan_kmod_dev *dev;
|
||||
unsigned core_id_range;
|
||||
struct pan_kmod_perf_session *session;
|
||||
const struct pan_perf_config *cfg;
|
||||
|
||||
// Memory where to dump counter values
|
||||
uint32_t *counter_values;
|
||||
uint32_t n_counter_values;
|
||||
|
||||
/* Offsets of categories */
|
||||
unsigned category_offset[PAN_PERF_MAX_CATEGORIES];
|
||||
struct pan_kmod_perf_buffer_layout mem_layout;
|
||||
double derived_configs[PAN_PERF_DERIVED_CONFIG_LAST + 1];
|
||||
};
|
||||
|
||||
uint32_t pan_perf_counter_read(const struct pan_perf_counter *counter,
|
||||
const struct pan_perf *perf);
|
||||
static inline
|
||||
int64_t pan_perf_counter_read_raw(const struct pan_perf *perf,
|
||||
enum pan_perf_counter_categories cat,
|
||||
uint8_t counter_index,
|
||||
uint8_t block)
|
||||
{
|
||||
STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_FRONTEND == (int)PAN_PERF_COUNTER_CAT_FRONTEND);
|
||||
STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_TILER == (int)PAN_PERF_COUNTER_CAT_TILER);
|
||||
STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_MEMSYS == (int)PAN_PERF_COUNTER_CAT_MEMSYS);
|
||||
STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_SHADER == (int)PAN_PERF_COUNTER_CAT_SHADER);
|
||||
|
||||
assert(perf->session->data != NULL);
|
||||
|
||||
const uint32_t offset = perf->mem_layout.category[cat].offset +
|
||||
perf->mem_layout.block_stride * block +
|
||||
perf->mem_layout.counter_stride * counter_index;
|
||||
|
||||
uint8_t *val_ptr = ((uint8_t *)perf->session->data) + offset;
|
||||
return pan_kmod_perf_load_counter(perf->session, val_ptr);
|
||||
}
|
||||
|
||||
int64_t pan_perf_counter_read(const struct pan_perf *perf,
|
||||
const struct pan_perf_counter *counter,
|
||||
uint8_t block);
|
||||
|
||||
int64_t pan_perf_counter_read_block_sum(const struct pan_perf_counter *counter,
|
||||
const struct pan_perf *perf);
|
||||
|
||||
void pan_perf_init(struct pan_perf *perf, int fd);
|
||||
|
||||
|
|
@ -88,8 +132,16 @@ int pan_perf_enable(struct pan_perf *perf);
|
|||
|
||||
int pan_perf_disable(struct pan_perf *perf);
|
||||
|
||||
void pan_perf_finish(struct pan_perf *perf);
|
||||
|
||||
int pan_perf_dump(struct pan_perf *perf);
|
||||
|
||||
uint64_t pan_perf_get_timestamp(const struct pan_perf *perf);
|
||||
|
||||
bool pan_perf_timestamp_supported(const struct pan_perf *perf);
|
||||
|
||||
uint64_t pan_perf_get_min_sampling_period(const struct pan_perf *perf);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -45,8 +45,8 @@ main(void)
|
|||
|
||||
for (unsigned j = 0; j < cat->n_counters; ++j) {
|
||||
const struct pan_perf_counter *ctr = &cat->counters[j];
|
||||
uint32_t val = pan_perf_counter_read(ctr, perf);
|
||||
printf("%s (%s): %u\n", ctr->name, ctr->symbol_name, val);
|
||||
int64_t val = pan_perf_counter_read_block_sum(ctr, perf);
|
||||
printf("%s (%s): %ld\n", ctr->name, ctr->symbol_name, val);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
|
|
|||
|
|
@ -21,6 +21,15 @@ data_sources {
|
|||
}
|
||||
}
|
||||
|
||||
data_sources {
|
||||
config {
|
||||
name: "gpu.counters.panthor"
|
||||
gpu_counter_config {
|
||||
counter_period_ns: 500000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data_sources {
|
||||
config {
|
||||
name: "gpu.counters.v3d"
|
||||
|
|
|
|||
|
|
@ -73,6 +73,15 @@ data_sources {
|
|||
}
|
||||
}
|
||||
|
||||
data_sources {
|
||||
config {
|
||||
name: "gpu.counters.panthor"
|
||||
gpu_counter_config {
|
||||
counter_period_ns: 500000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data_sources {
|
||||
config {
|
||||
name: "gpu.counters.v3d"
|
||||
|
|
|
|||
|
|
@ -40,6 +40,10 @@ class Counter
|
|||
Byte,
|
||||
Hertz,
|
||||
None,
|
||||
Primitive,
|
||||
Instruction,
|
||||
Pixel,
|
||||
Fragment
|
||||
};
|
||||
|
||||
using Value = std::variant<int64_t, double>;
|
||||
|
|
|
|||
|
|
@ -181,6 +181,18 @@ template <typename GpuCounterDescriptor> void add_descriptors(GpuCounterDescript
|
|||
case Counter::Units::None:
|
||||
units = GpuCounterDescriptor::NONE;
|
||||
break;
|
||||
case Counter::Units::Primitive:
|
||||
units = GpuCounterDescriptor::PRIMITIVE;
|
||||
break;
|
||||
case Counter::Units::Instruction:
|
||||
units = GpuCounterDescriptor::INSTRUCTION;
|
||||
break;
|
||||
case Counter::Units::Pixel:
|
||||
units = GpuCounterDescriptor::PIXEL;
|
||||
break;
|
||||
case Counter::Units::Fragment:
|
||||
units = GpuCounterDescriptor::FRAGMENT;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Missing counter units type!");
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ std::unordered_map<std::string, std::unique_ptr<Driver>> create_supported_driver
|
|||
|
||||
#ifdef PPS_PANFROST
|
||||
map.emplace("panfrost", std::make_unique<PanfrostDriver>());
|
||||
map.emplace("panthor", std::make_unique<PanfrostDriver>());
|
||||
#endif // PPS_PANFROST
|
||||
|
||||
#ifdef PPS_V3D
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue