diff --git a/include/drm-uapi/panthor_drm.h b/include/drm-uapi/panthor_drm.h index e238c6264fa..174f39dc2da 100644 --- a/include/drm-uapi/panthor_drm.h +++ b/include/drm-uapi/panthor_drm.h @@ -154,6 +154,9 @@ enum drm_panthor_ioctl_id { * This is useful for imported BOs. */ DRM_PANTHOR_BO_QUERY_INFO, + + /** @DRM_PANTHOR_PERF_CONTROL: Control a performance counter session. */ + DRM_PANTHOR_PERF_CONTROL, }; /** @@ -253,6 +256,9 @@ enum drm_panthor_dev_query_type { * @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group priorities information. */ DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, + + /** @DRM_PANTHOR_DEV_QUERY_PERF_INFO: Query performance counter interface information. */ + DRM_PANTHOR_DEV_QUERY_PERF_INFO, }; /** @@ -445,6 +451,138 @@ struct drm_panthor_group_priorities_info { __u8 pad[3]; }; +/** + * enum drm_panthor_perf_feat_flags - Performance counter configuration feature flags. + */ +enum drm_panthor_perf_feat_flags { + /** @DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT: Coarse-grained block states are supported. */ + DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT = 1 << 0, +}; + +/** + * enum drm_panthor_perf_block_type - Performance counter supported block types. + */ +enum drm_panthor_perf_block_type { + /** @DRM_PANTHOR_PERF_BLOCK_METADATA: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_METADATA = 0, + + /** @DRM_PANTHOR_PERF_BLOCK_FW: The FW counter block. */ + DRM_PANTHOR_PERF_BLOCK_FW, + + /** @DRM_PANTHOR_PERF_BLOCK_CSHW: The CSHW counter block. */ + DRM_PANTHOR_PERF_BLOCK_CSHW, + + /** @DRM_PANTHOR_PERF_BLOCK_TILER: The tiler counter block. */ + DRM_PANTHOR_PERF_BLOCK_TILER, + + /** @DRM_PANTHOR_PERF_BLOCK_MEMSYS: A memsys counter block. */ + DRM_PANTHOR_PERF_BLOCK_MEMSYS, + + /** @DRM_PANTHOR_PERF_BLOCK_SHADER: A shader core counter block. */ + DRM_PANTHOR_PERF_BLOCK_SHADER, + + /** @DRM_PANTHOR_PERF_BLOCK_FIRST: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_FIRST = DRM_PANTHOR_PERF_BLOCK_FW, + + /** @DRM_PANTHOR_PERF_BLOCK_LAST: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_LAST = DRM_PANTHOR_PERF_BLOCK_SHADER, + + /** @DRM_PANTHOR_PERF_BLOCK_MAX: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_MAX = DRM_PANTHOR_PERF_BLOCK_LAST + 1, +}; + +/** + * enum drm_panthor_perf_clock - Identifier of the clock used to produce the cycle count values + * in a given block. + * + * Since the integrator has the choice of using one or more clocks, there may be some confusion + * as to which blocks are counted by which clock values unless this information is explicitly + * provided as part of every block sample. Not every single clock here can be used: in the simplest + * case, all cycle counts will be associated with the top-level clock. + */ +enum drm_panthor_perf_clock { + /** @DRM_PANTHOR_PERF_CLOCK_TOPLEVEL: Top-level CSF clock. */ + DRM_PANTHOR_PERF_CLOCK_TOPLEVEL, + + /** + * @DRM_PANTHOR_PERF_CLOCK_COREGROUP: Core group clock, responsible for the MMU, L2 + * caches and the tiler. + */ + DRM_PANTHOR_PERF_CLOCK_COREGROUP, + + /** @DRM_PANTHOR_PERF_CLOCK_SHADER: Clock for the shader cores. */ + DRM_PANTHOR_PERF_CLOCK_SHADER, +}; + +/** + * struct drm_panthor_perf_info - Performance counter interface information + * + * Structure grouping all queryable information relating to the performance counter + * interfaces. + */ +struct drm_panthor_perf_info { + /** + * @counters_per_block: The number of 8-byte counters available in a block. + */ + __u32 counters_per_block; + + /** + * @sample_header_size: The size of the header struct available at the beginning + * of every sample. + */ + __u32 sample_header_size; + + /** + * @block_header_size: The size of the header struct inline with the counters for a + * single block. + */ + __u32 block_header_size; + + /** + * @sample_size: The size of a fully annotated sample, starting with a sample header + * of size @sample_header_size bytes, and all available blocks for the current + * configuration, each comprised of @counters_per_block 64-bit counters and + * a block header of @block_header_size bytes. + * + * The user must use this field to allocate size for the ring buffer. In + * the case of new blocks being added, an old userspace can always use + * this field and ignore any blocks it does not know about. + */ + __u32 sample_size; + + /** @flags: Combination of drm_panthor_perf_feat_flags flags. */ + __u32 flags; + + /** + * @supported_clocks: Bitmask of the clocks supported by the GPU. + * + * Each bit represents a variant of the enum drm_panthor_perf_clock. + * + * For the same GPU, different implementers may have different clocks for the same hardware + * block. At the moment, up to three clocks are supported, and any clocks that are present + * will be reported here. + */ + __u32 supported_clocks; + + /** @fw_blocks: Number of FW blocks available. */ + __u32 fw_blocks; + + /** @cshw_blocks: Number of CSHW blocks available. */ + __u32 cshw_blocks; + + /** @tiler_blocks: Number of tiler blocks available. */ + __u32 tiler_blocks; + + /** @memsys_blocks: Number of memsys blocks available. */ + __u32 memsys_blocks; + + /** @shader_blocks: Number of shader core blocks available. */ + __u32 shader_blocks; + + /** @pad: MBZ. */ + __u32 pad; +}; + /** * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY */ @@ -1187,6 +1325,434 @@ struct drm_panthor_bo_query_info { __u32 pad; }; +/** + * DOC: Performance counter decoding in userspace. + * + * Each sample will be exposed to userspace in the following manner: + * + * +--------+--------+------------------------+--------+-------------------------+-----+ + * | Sample | Block | Block | Block | Block | ... | + * | header | header | counters | header | counters | | + * +--------+--------+------------------------+--------+-------------------------+-----+ + * + * Each sample will start with a sample header of type @struct drm_panthor_perf_sample header, + * providing sample-wide information like the start and end timestamps, the counter set currently + * configured, and any errors that may have occurred during sampling. + * + * After the fixed size header, the sample will consist of blocks of + * 64-bit @drm_panthor_dev_query_perf_info::counters_per_block counters, each prefaced with a + * header of its own, indicating source block type, as well as the cycle count needed to normalize + * cycle values within that block, and a clock source identifier. + */ + +/** + * enum drm_panthor_perf_block_state - Bitmask of the power and execution states that an individual + * hardware block went through in a sampling period. + * + * Because the sampling period is controlled from userspace, the block may undergo multiple + * state transitions, so this must be interpreted as one or more such transitions occurring. + */ +enum drm_panthor_perf_block_state { + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN: The state of this block was unknown during + * the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN = 0, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_ON: This block was powered on for some or all of + * the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_ON = 1 << 0, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_OFF: This block was powered off for some or all of the + * sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_OFF = 1 << 1, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE: This block was available for execution for + * some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE = 1 << 2, + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE: This block was unavailable for execution for + * some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE = 1 << 3, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL: This block was executing in normal mode + * for some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL = 1 << 4, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED: This block was executing in protected mode + * for some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED = 1 << 5, +}; + +/** + * struct drm_panthor_perf_block_header - Header present before every block in the + * sample ringbuffer. + */ +struct drm_panthor_perf_block_header { + /** @block_type: Type of the block. */ + __u8 block_type; + + /** @block_idx: Block index. */ + __u8 block_idx; + + /** + * @block_states: Coarse-grained block transitions, bitmask of enum + * drm_panthor_perf_block_states. + */ + __u8 block_states; + + /** + * @clock: Clock used to produce the cycle count for this block, taken from + * enum drm_panthor_perf_clock. The cycle counts are stored in the sample header. + */ + __u8 clock; + + /** @pad: MBZ. */ + __u8 pad[4]; + + /** @enable_mask: Bitmask of counters requested during the session setup. */ + __u64 enable_mask[2]; +}; + +/** + * enum drm_panthor_perf_sample_flags - Sample-wide events that occurred over the sampling + * period. + */ +enum drm_panthor_perf_sample_flags { + /** + * @DRM_PANTHOR_PERF_SAMPLE_OVERFLOW: This sample contains overflows due to the duration + * of the sampling period. + */ + DRM_PANTHOR_PERF_SAMPLE_OVERFLOW = 1 << 0, + + /** + * @DRM_PANTHOR_PERF_SAMPLE_ERROR: This sample encountered an error condition during + * the sample duration. + */ + DRM_PANTHOR_PERF_SAMPLE_ERROR = 1 << 1, +}; + +/** + * struct drm_panthor_perf_sample_header - Header present before every sample. + */ +struct drm_panthor_perf_sample_header { + /** + * @timestamp_start_ns: Earliest timestamp that values in this sample represent, in + * nanoseconds. Derived from CLOCK_MONOTONIC_RAW. + */ + __u64 timestamp_start_ns; + + /** + * @timestamp_end_ns: Latest timestamp that values in this sample represent, in + * nanoseconds. Derived from CLOCK_MONOTONIC_RAW. + */ + __u64 timestamp_end_ns; + + /** @block_set: Set of performance counter blocks. */ + __u8 block_set; + + /** @pad: MBZ. */ + __u8 pad[3]; + + /** @flags: Current sample flags, combination of drm_panthor_perf_sample_flags. */ + __u32 flags; + + /** + * @user_data: User data provided as part of the command that triggered this sample. + * + * - Automatic samples (periodic ones or those around non-counting periods or power state + * transitions) will be tagged with the user_data provided as part of the + * DRM_PANTHOR_PERF_COMMAND_START call. + * - Manual samples will be tagged with the user_data provided with the + * DRM_PANTHOR_PERF_COMMAND_SAMPLE call. + * - A session's final automatic sample will be tagged with the user_data provided with the + * DRM_PANTHOR_PERF_COMMAND_STOP call. + */ + __u64 user_data; + + /** + * @toplevel_clock_cycles: The number of cycles elapsed between + * drm_panthor_perf_sample_header::timestamp_start_ns and + * drm_panthor_perf_sample_header::timestamp_end_ns on the top-level clock if the + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. + */ + __u64 toplevel_clock_cycles; + + /** + * @coregroup_clock_cycles: The number of cycles elapsed between + * drm_panthor_perf_sample_header::timestamp_start_ns and + * drm_panthor_perf_sample_header::timestamp_end_ns on the coregroup clock if the + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. + */ + __u64 coregroup_clock_cycles; + + /** + * @shader_clock_cycles: The number of cycles elapsed between + * drm_panthor_perf_sample_header::timestamp_start_ns and + * drm_panthor_perf_sample_header::timestamp_end_ns on the shader core clock if the + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. + */ + __u64 shader_clock_cycles; +}; + +/** + * enum drm_panthor_perf_command - Command type passed to the DRM_PANTHOR_PERF_CONTROL + * IOCTL. + */ +enum drm_panthor_perf_command { + /** @DRM_PANTHOR_PERF_COMMAND_SETUP: Create a new performance counter sampling context. */ + DRM_PANTHOR_PERF_COMMAND_SETUP, + + /** @DRM_PANTHOR_PERF_COMMAND_TEARDOWN: Teardown a performance counter sampling context. */ + DRM_PANTHOR_PERF_COMMAND_TEARDOWN, + + /** @DRM_PANTHOR_PERF_COMMAND_START: Start a sampling session on the indicated context. */ + DRM_PANTHOR_PERF_COMMAND_START, + + /** @DRM_PANTHOR_PERF_COMMAND_STOP: Stop the sampling session on the indicated context. */ + DRM_PANTHOR_PERF_COMMAND_STOP, + + /** + * @DRM_PANTHOR_PERF_COMMAND_SAMPLE: Request a manual sample on the indicated context. + * + * When the sampling session is configured with a non-zero sampling frequency, any + * DRM_PANTHOR_PERF_CONTROL calls with this command will be ignored and return an + * -EINVAL. + */ + DRM_PANTHOR_PERF_COMMAND_SAMPLE, +}; + +/** + * struct drm_panthor_perf_control - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL. + */ +struct drm_panthor_perf_control { + /** @cmd: Command from enum drm_panthor_perf_command. */ + __u32 cmd; + + /** + * @handle: session handle. + * + * Returned by the DRM_PANTHOR_PERF_COMMAND_SETUP call. + * It must be used in subsequent commands for the same context. + */ + __u32 handle; + + /** + * @size: size of the command structure. + * + * If the pointer is NULL, the size is updated by the driver to provide the size of the + * output structure. If the pointer is not NULL, the driver will only copy min(size, + * struct_size) to the pointer and update the size accordingly. + */ + __u64 size; + + /** + * @pointer: user pointer to a command type struct, such as + * @struct drm_panthor_perf_cmd_start. + */ + __u64 pointer; +}; + +/** + * enum drm_panthor_perf_counter_set - The counter set to be requested from the hardware. + * + * The hardware supports a single performance counter set at a time, so requesting any set other + * than the primary may fail if another process is sampling at the same time. + * + * If in doubt, the primary counter set has the most commonly used counters and requires no + * additional permissions to open. + */ +enum drm_panthor_perf_counter_set { + /** + * @DRM_PANTHOR_PERF_SET_PRIMARY: The default set configured on the hardware. + * + * This is the only set for which all counters in all blocks are defined. + */ + DRM_PANTHOR_PERF_SET_PRIMARY, + + /** + * @DRM_PANTHOR_PERF_SET_SECONDARY: The secondary performance counter set. + * + * Some blocks may not have any defined counters for this set, and the block will + * have the UNAVAILABLE block state permanently set in the block header. + * + * Accessing this set requires the calling process to have the CAP_PERFMON capability. + */ + DRM_PANTHOR_PERF_SET_SECONDARY, + + /** + * @DRM_PANTHOR_PERF_SET_TERTIARY: The tertiary performance counter set. + * + * Some blocks may not have any defined counters for this set, and the block will have + * the UNAVAILABLE block state permanently set in the block header. Note that the + * tertiary set has the fewest defined counter blocks. + * + * Accessing this set requires the calling process to have the CAP_PERFMON capability. + */ + DRM_PANTHOR_PERF_SET_TERTIARY, +}; + +/** + * struct drm_panthor_perf_ringbuf_control - Struct used to map in the ring buffer control indices + * into memory shared between user and kernel. + * + */ +struct drm_panthor_perf_ringbuf_control { + /** + * @extract_idx: The index of the latest sample that was processed by userspace. Only + * modifiable by userspace. + */ + __u64 extract_idx; + + /** + * @insert_idx: The index of the latest sample emitted by the kernel. Only modifiable by + * the kernel. + */ + __u64 insert_idx; +}; + +/** + * struct drm_panthor_perf_cmd_setup - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_SETUP command is specified. + */ +struct drm_panthor_perf_cmd_setup { + /** + * @block_set: Set of performance counter blocks, member of + * enum drm_panthor_perf_block_set. + * + * This is a global configuration and only one set can be active at a time. If + * another client has already requested a counter set, any further requests + * for a different counter set will fail and return an -EBUSY. + * + * If the requested set does not exist, the request will fail and return an -EINVAL. + * + * Some sets have additional requirements to be enabled, and the setup request will + * fail with an -EACCES if these requirements are not satisfied. + */ + __u8 block_set; + + /** @pad: MBZ. */ + __u8 pad[7]; + + /** @fd: eventfd for signalling the availability of a new sample. */ + __u32 fd; + + /** @ringbuf_handle: Handle to the BO to write perf counter sample to. */ + __u32 ringbuf_handle; + + /** + * @control_handle: Handle to the BO containing a contiguous 16 byte range, used for the + * insert and extract indices for the ringbuffer. + */ + __u32 control_handle; + + /** + * @sample_slots: The number of slots available in the userspace-provided BO. Must be + * a power of 2. + * + * If sample_slots * sample_size does not match the BO size, the setup request will fail. + */ + __u32 sample_slots; + + /** + * @control_offset: Offset into the control BO where the insert and extract indices are + * located. + */ + __u64 control_offset; + + /** + * @sample_freq_ns: Period between automatic counter sample collection in nanoseconds. Zero + * disables automatic collection and all collection must be done through explicit calls + * to DRM_PANTHOR_PERF_CONTROL.SAMPLE. Non-zero values will disable manual counter sampling + * via the DRM_PANTHOR_PERF_COMMAND_SAMPLE command. + * + * This disables software-triggered periodic sampling, but hardware will still trigger + * automatic samples on certain events, including shader core power transitions, and + * entries to and exits from non-counting periods. The final stop command will also + * trigger a sample to ensure no data is lost. + */ + __u64 sample_freq_ns; + + /** + * @fw_enable_mask: Bitmask of counters to request from the FW counter block. Any bits + * past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0 + * corresponds to counter 0. + */ + __u64 fw_enable_mask[2]; + + /** + * @cshw_enable_mask: Bitmask of counters to request from the CSHW counter block. Any bits + * past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0 + * corresponds to counter 0. + */ + __u64 cshw_enable_mask[2]; + + /** + * @tiler_enable_mask: Bitmask of counters to request from the tiler counter block. Any + * bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit + * 0 corresponds to counter 0. + */ + __u64 tiler_enable_mask[2]; + + /** + * @memsys_enable_mask: Bitmask of counters to request from the memsys counter blocks. Any + * bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0 + * corresponds to counter 0. + */ + __u64 memsys_enable_mask[2]; + + /** + * @shader_enable_mask: Bitmask of counters to request from the shader core counter blocks. + * Any bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. + * Bit 0 corresponds to counter 0. + */ + __u64 shader_enable_mask[2]; +}; + +/** + * struct drm_panthor_perf_cmd_start - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_START command is specified. + */ +struct drm_panthor_perf_cmd_start { + /** + * @user_data: User provided data that will be attached to automatic samples collected + * until the next DRM_PANTHOR_PERF_COMMAND_STOP. + */ + __u64 user_data; +}; + +/** + * struct drm_panthor_perf_cmd_stop - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_STOP command is specified. + */ +struct drm_panthor_perf_cmd_stop { + /** + * @user_data: User provided data that will be attached to the automatic sample collected + * at the end of this sampling session. + */ + __u64 user_data; +}; + +/** + * struct drm_panthor_perf_cmd_sample - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_SAMPLE command is specified. + */ +struct drm_panthor_perf_cmd_sample { + /** @user_data: User provided data that will be attached to the sample.*/ + __u64 user_data; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1237,6 +1803,8 @@ enum { DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), DRM_IOCTL_PANTHOR_BO_QUERY_INFO = DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info), + DRM_IOCTL_PANTHOR_PERF_CONTROL = + DRM_IOCTL_PANTHOR(WR, PERF_CONTROL, perf_control) }; #if defined(__cplusplus)