panfrost/perf: Add Gx10 perfcounters

Add manually created Mali-Gx10 counter definitions.

v2:
 - Added the architecture major field.

v3:
 - Swap the order of the shader core and memsys blocks.

v4:
 - G710 -> Gx10, to indicate that all GPUs in this generation are
   supported

Signed-off-by: Lukas Zapolskas <lukas.zapolskas@arm.com>
Co-developed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
This commit is contained in:
Lukas Zapolskas 2024-12-13 17:09:41 +00:00 committed by Christoph Pillmayer
parent 23d265928e
commit 7618984cf5
2 changed files with 176 additions and 1 deletions

175
src/panfrost/perf/Gx10.xml Normal file
View file

@ -0,0 +1,175 @@
<!--
Copyright © 2025 Arm, Ltd.
Author: Lukas Zapolskas <lukas.zapolskas@arm.com>
SPDX-License-Identifier: MIT
-->
<metrics id="TVAx" arch_major="10">
<category name="CSF" per_cpu="no">
<event offset="4" counter="GPU_ACTIVE" title="GPU active" name="GPU active cycles" description="The number of cycles when the GPU has a workload of any type queued for processing." units="cycles"/>
<event offset="5" advanced="yes" counter="MCU_ACTIVE" title="MCU active" name="MCU active cycles" description="The number of cycles when the CSF front-end MCU is actively processing." units="cycles"/>
<event offset="6" counter="GPU_ITER_ACTIVE" title="Any queue active" name="Any queue active cycles" description="The number of cycles when any GPU queue is active." units="cycles"/>
<event offset="10" advanced="yes" counter="IRQ_ACTIVE" title="GPU interrupt active" name="GPU interrupt pending cycles" description="The number of cycles when the GPU has a pending interrupt." units="cycles"/>
<event offset="11" advanced="yes" counter="GPU_IRQ_COUNT" title="GPU interrupts" name="GPU interrupts" description="The number of interrupts raised by the GPU to the CPU." units="interrupts"/>
<event offset="12" advanced="yes" counter="CACHE_FLUSH_CYCLES" title="L2 cache flush" name="L2 cache flush cycles" description="The number of cycles spent flushing GPU L2 caches." units="cycles"/>
<event offset="13" advanced="yes" counter="CACHE_FLUSH" title="L2 cache flushes" name="L2 cache flush requests" description="The number of GPU L2 cache flushes performed." units="requests"/>
<event offset="16" counter="ITER_TILER_ACTIVE" title="Vertex queued" name="Vertex work queued cycles" description="The number of cycles that the vertex shading queue has work queued." units="cycles"/>
<event offset="17" advanced="yes" counter="ITER_TILER_JOB_COMPLETED" title="Vertex jobs" name="Vertex jobs" description="The number of vertex jobs processed." units="jobs"/>
<event offset="18" advanced="yes" counter="ITER_TILER_IDVS_TASK_COMPLETED" title="Vertex tasks" name="Vertex tasks" description="The number of vertex tasks processed." units="tasks"/>
<event offset="19" advanced="yes" counter="ITER_TILER_TOTAL_IDVS_TASK_ACTIVE_CYCLES" title="Vertex total" name="Total vertex active cycles" description="The total number of vertex shading cycles processed." units="cycles"/>
<event offset="20" advanced="yes" counter="ITER_TILER_IRQ_ACTIVE" title="Vertex queue interrupt active" name="Vertex queue interrupt pending cycles" description="The number of cycles that the vertex queue IRQ was pending." units="cycles"/>
<event offset="22" counter="ITER_TILER_READY_BLOCKED" title="Vertex queue endpoint stall" name="Vertex queue endpoint stall cycles" description="The number of cycles the vertex queue is waiting for endpoints to be assigned." units="cycles"/>
<event offset="23" advanced="yes" counter="ITER_TILER_EP_DRAIN" title="Vertex queue endpoint drain stall" name="Vertex queue endpoint drain stall cycles" description="The number of cycles the vertex queue is waiting for endpoints to drain." units="cycles"/>
<event offset="24" counter="ITER_COMP_ACTIVE" title="Compute queued" name="Compute work queued cycles" description="The number of cycles that the compute queue has work queued." units="cycles"/>
<event offset="25" advanced="yes" counter="ITER_COMP_JOB_COMPLETED" title="Compute jobs" name="Compute jobs" description="The number of compute jobs processed." units="jobs"/>
<event offset="26" advanced="yes" counter="ITER_COMP_TASK_COMPLETED" title="Compute tasks" name="Compute tasks" description="The number of compute tasks processed." units="tasks"/>
<event offset="27" advanced="yes" counter="ITER_COMP_TOTAL_TASK_ACTIVE_CYCLES" title="Compute total" name="Total compute active cycles" description="The total number of compute shading cycles processed." units="cycles"/>
<event offset="28" advanced="yes" counter="ITER_COMP_IRQ_ACTIVE" title="Compute queue interrupt active" name="Compute queue interrupt pending cycles" description="The number of cycles that the compute queue IRQ was pending." units="cycles"/>
<event offset="30" counter="ITER_COMP_READY_BLOCKED" title="Compute queue endpoint stall" name="Compute queue endpoint stall cycles" description="The number of cycles the compute queue is waiting for endpoints to be assigned." units="cycles"/>
<event offset="31" advanced="yes" counter="ITER_COMP_EP_DRAIN" title="Compute queue endpoint drain stall" name="Compute queue endpoint drain stall cycles" description="The number of cycles the compute queue is waiting for endpoints to drain." units="cycles"/>
<event offset="32" counter="ITER_FRAG_ACTIVE" title="Fragment queued" name="Fragment work queued cycles" description="The number of cycles that the fragment queue has work queued." units="cycles"/>
<event offset="33" advanced="yes" counter="ITER_FRAG_JOB_COMPLETED" title="Fragment jobs" name="Fragment jobs" description="The number of fragment jobs processed." units="jobs"/>
<event offset="34" counter="ITER_FRAG_TASK_COMPLETED" title="Fragment tasks" name="Fragment tasks" description="The number of fragment tasks processed." units="tasks"/>
<event offset="35" advanced="yes" counter="ITER_FRAG_TOTAL_TASK_ACTIVE_CYCLES" title="Fragment total" name="Total fragment active cycles" description="The total number of fragment shading cycles processed." units="cycles"/>
<event offset="36" advanced="yes" counter="ITER_FRAG_IRQ_ACTIVE" title="Fragment queue interrupt active" name="Fragment queue interrupt pending cycles" description="The number of cycles that the fragment queue IRQ was pending." units="cycles"/>
<event offset="38" counter="ITER_FRAG_READY_BLOCKED" title="Fragment queue endpoint stall" name="Fragment queue endpoint stall cycles" description="The number of cycles the fragment queue is waiting for endpoints to be assigned." units="cycles"/>
<event offset="40" advanced="yes" counter="CEU_ACTIVE" title="CEU active" name="Command execution unit active cycles" description="The number of cycles that the CEU is processing commands." units="cycles"/>
<event offset="45" advanced="yes" counter="LSU_ACTIVE" title="LSU active" name="Command load/store unit active cycles" description="The number of cycles that the load-store unit is processing commands." units="cycles"/>
<event offset="48" advanced="yes" counter="CSHWIF0_ENABLED" title="CS0 active" name="Command stream 0 active cycles" description="The number of cycles that command stream interface 0 contained an enabled command stream." units="cycles"/>
<event offset="51" advanced="yes" counter="CSHWIF0_WAIT_BLOCKED" title="CS0 wait stall" name="Command stream 0 wait stall cycles" description="The number of cycles that command stream interface 0 was blocked because of a scheduling dependency." units="cycles"/>
<event offset="52" advanced="yes" counter="CSHWIF1_ENABLED" title="CS1 active" name="Command stream 1 active cycles" description="The number of cycles that command stream interface 1 contained an enabled command stream." units="cycles"/>
<event offset="55" advanced="yes" counter="CSHWIF1_WAIT_BLOCKED" title="CS1 wait stall" name="Command stream 1 wait stall cycles" description="The number of cycles that command stream interface 1 was blocked because of a scheduling dependency." units="cycles"/>
<event offset="56" advanced="yes" counter="CSHWIF2_ENABLED" title="CS2 active" name="Command stream 2 active cycles" description="The number of cycles that command stream interface 2 contained an enabled command stream." units="cycles"/>
<event offset="59" advanced="yes" counter="CSHWIF2_WAIT_BLOCKED" title="CS2 wait stall" name="Command stream 2 wait stall cycles" description="The number of cycles that command stream interface 2 was blocked because of a scheduling dependency." units="cycles"/>
<event offset="60" advanced="yes" counter="CSHWIF3_ENABLED" title="CS3 active" name="Command stream 3 active cycles" description="The number of cycles that command stream interface 3 contained an enabled command stream." units="cycles"/>
<event offset="63" advanced="yes" counter="CSHWIF3_WAIT_BLOCKED" title="CS3 wait stall" name="Command stream 3 wait stall cycles" description="The number of cycles that command stream interface 3 was blocked because of a scheduling dependency." units="cycles"/>
</category>
<category name="Tiler" per_cpu="no">
<event offset="4" advanced="yes" counter="TILER_ACTIVE" title="Tiler active" name="Tiler active cycles" description="The number of cycles when the tiler has a workload queued for processing." units="cycles"/>
<event offset="6" counter="TRIANGLES" title="Triangle primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives"/>
<event offset="7" counter="LINES" title="Line primitives" name="Line primitives" description="The number of input line primitives." units="primitives"/>
<event offset="8" counter="POINTS" title="Point primitives" name="Point primitives" description="The number of input point primitives." units="primitives"/>
<event offset="9" advanced="yes" counter="FRONT_FACING" title="Front-facing primitives" name="Visible front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives"/>
<event offset="10" advanced="yes" counter="BACK_FACING" title="Back-facing primitives" name="Visible back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives"/>
<event offset="11" counter="PRIM_VISIBLE" title="Visible primitives" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives"/>
<event offset="12" counter="PRIM_CULLED" title="Facing or XY plane test culled primitives" name="Facing or XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives"/>
<event offset="13" counter="PRIM_CLIPPED" title="Z plane test culled primitives" name="Z plane culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives"/>
<event offset="14" counter="PRIM_SAT_CULLED" title="Sample test culled primitives" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives"/>
<event offset="17" advanced="yes" counter="BUS_READ" title="Read beats" name="Output internal read beats" description="The number of internal bus data read cycles made by the tiler." units="beats"/>
<event offset="21" counter="IDVS_POS_SHAD_REQ" title="Position shading requests" name="Tiler position shading requests" description="The number of position shading requests in the tiler geometry flow." units="requests"/>
<event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Position shading stall" name="Tiler position shading stall cycles" description="The number of cycles when the tiler has a stalled position shading request." units="cycles"/>
<event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Position FIFO full stall" name="Tiler position FIFO full cycles" description="The number of cycles when the tiler has a stalled position shading buffer." units="cycles"/>
<event offset="26" advanced="yes" counter="VCACHE_HIT" title="Position cache hits" name="Position cache hit requests" description="The number of position lookups that result in a hit in the vertex cache." units="requests"/>
<event offset="27" advanced="yes" counter="VCACHE_MISS" title="Position cache misses" name="Position cache miss requests" description="The number of position lookups that miss in the vertex cache." units="requests"/>
<event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Varying cache hits" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests"/>
<event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Varying cache misses" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests"/>
<event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Varying shading requests" name="Tiler varying shading requests" description="The number of varying shading requests in the tiler geometry flow." units="requests"/>
<event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Varying shading stall" name="Tiler varying shading stall cycles" description="The number of cycles when the tiler has a stalled varying shading request." units="cycles"/>
</category>
<category name="Memory System" per_cpu="no">
<event offset="4" advanced="yes" counter="MMU_REQUESTS" title="MMU lookups" name="MMU lookup requests" description="The number of main MMU address translations performed." units="requests"/>
<event offset="5" advanced="yes" counter="MMU_TABLE_READS_L3" title="L3 table reads" name="MMU L3 table read requests" description="The number of level 3 translation table reads." units="requests"/>
<event offset="6" advanced="yes" counter="MMU_TABLE_READS_L2" title="L2 table reads" name="MMU L2 table read requests" description="The number of level 2 translation table reads." units="requests"/>
<event offset="7" advanced="yes" counter="MMU_HIT_L3" title="L3 table read hits" name="MMU L3 lookup TLB hits" description="The number of level 3 translation table reads that hit in the main MMU TLB." units="requests"/>
<event offset="8" advanced="yes" counter="MMU_HIT_L2" title="L2 table read hits" name="MMU L2 lookup TLB hits" description="The number of level 2 translation table reads that hit in the main MMU TLB." units="requests"/>
<event offset="12" advanced="yes" counter="L2_RD_MSG_IN_EVICT" title="Evict requests" name="Input internal evict requests" description="The number of L2 cache line evict requests from internal requesters." units="requests"/>
<event offset="13" advanced="yes" counter="L2_RD_MSG_IN_CU" title="Clean unique requests" name="Input internal clean unique requests" description="The number of L2 cache line clean unique requests from internal requesters." units="requests"/>
<event offset="16" advanced="yes" counter="L2_RD_MSG_IN" title="Read requests" name="Input internal read requests" description="The number of L2 cache read requests from internal requesters." units="requests"/>
<event offset="17" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="Read stall" name="Input internal read stall cycles" description="The number of cycles L2 cache read requests from internal requesters are stalled." units="cycles"/>
<event offset="18" advanced="yes" counter="L2_WR_MSG_IN" title="Write requests" name="Input internal write requests" description="The number of L2 cache write requests from internal requesters." units="requests"/>
<event offset="19" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="Write stall" name="Input internal write stall cycles" description="The number of cycles when L2 cache write requests from internal requesters are stalled." units="cycles"/>
<event offset="20" advanced="yes" counter="L2_SNP_MSG_IN" title="Snoop requests" name="Input internal snoop requests" description="The number of L2 snoop requests from internal requesters." units="requests"/>
<event offset="21" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="Snoop stall" name="Input internal snoop stall cycles" description="The number of cycles when L2 cache snoop requests from internal requesters are stalled." units="cycles"/>
<event offset="22" advanced="yes" counter="L2_RD_MSG_OUT" title="L1 read requests" name="Output internal read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal requester." units="requests"/>
<event offset="23" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L1 read stall" name="Output internal read stall cycles" description="The number of cycles when L1 cache read requests sent by the L2 cache to an internal requester are stalled." units="cycles"/>
<event offset="24" advanced="yes" counter="L2_WR_MSG_OUT" title="L1 write requests" name="Output internal write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal requester." units="requests"/>
<event offset="25" advanced="yes" counter="L2_ANY_LOOKUP" title="Any lookups" name="Any lookup requests" description="The number of L2 cache lookups performed." units="requests"/>
<event offset="26" counter="L2_READ_LOOKUP" title="Read lookups" name="Read lookup requests" description="The number of L2 cache read lookups performed." units="requests"/>
<event offset="27" counter="L2_WRITE_LOOKUP" title="Write lookups" name="Write lookup requests" description="The number of L2 cache write lookups performed." units="requests"/>
<event offset="28" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="External snoop lookups" name="Input external snoop lookup requests" description="The number of coherency snoop lookups performed that were triggered by an external requester." units="requests"/>
<event offset="29" counter="L2_EXT_READ" title="Read transactions" name="Output external read transactions" description="The number of external read transactions." units="transactions"/>
<event offset="30" advanced="yes" counter="L2_EXT_READ_NOSNP" title="ReadNoSnoop transactions" name="Output external ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions"/>
<event offset="31" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="ReadUnique transactions" name="Output external ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions"/>
<event offset="32" counter="L2_EXT_READ_BEATS" title="Read beats" name="Output external read beats" description="The number of external bus data read cycles." units="beats"/>
<event offset="33" counter="L2_EXT_AR_STALL" title="Read stall" name="Output external read stall cycles" description="The number of cycles when a read is stalled waiting for the external bus." units="cycles"/>
<event offset="34" advanced="yes" counter="L2_EXT_AR_CNT_Q1" title="0-25% outstanding" name="Output external outstanding reads 0-25%" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
<event offset="35" advanced="yes" counter="L2_EXT_AR_CNT_Q2" title="25-50% outstanding" name="Output external outstanding reads 25-50%" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
<event offset="36" advanced="yes" counter="L2_EXT_AR_CNT_Q3" title="50-75% outstanding" name="Output external outstanding reads 50-75%" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
<event offset="37" counter="L2_EXT_RRESP_0_127" title="0-127 cycles" name="Output external read latency 0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats"/>
<event offset="38" counter="L2_EXT_RRESP_128_191" title="128-191 cycles" name="Output external read latency 128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats"/>
<event offset="39" counter="L2_EXT_RRESP_192_255" title="192-255 cycles" name="Output external read latency 192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats"/>
<event offset="40" counter="L2_EXT_RRESP_256_319" title="256-319 cycles" name="Output external read latency 256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats"/>
<event offset="41" counter="L2_EXT_RRESP_320_383" title="320-383 cycles" name="Output external read latency 320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats"/>
<event offset="42" counter="L2_EXT_WRITE" title="Write transactions" name="Output external write transactions" description="The number of external write transactions." units="transactions"/>
<event offset="43" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="WriteNoSnoopFull transactions" name="Output external WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions"/>
<event offset="44" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="WriteNoSnoopPartial transactions" name="Output external WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions"/>
<event offset="45" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="WriteSnoopFull transactions" name="Output external WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions"/>
<event offset="46" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="WriteSnoopPartial transactions" name="Output external WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions"/>
<event offset="47" counter="L2_EXT_WRITE_BEATS" title="Write beats" name="Output external write beats" description="The number of external bus data write cycles." units="beats"/>
<event offset="48" counter="L2_EXT_W_STALL" title="Write stall" name="Output external write stall cycles" description="The number of cycles when a write is stalled waiting for the external bus." units="cycles"/>
<event offset="49" advanced="yes" counter="L2_EXT_AW_CNT_Q1" title="0-25% outstanding" name="Output external outstanding writes 0-25%" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions"/>
<event offset="50" advanced="yes" counter="L2_EXT_AW_CNT_Q2" title="25-50% outstanding" name="Output external outstanding writes 25-50%" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions"/>
<event offset="51" advanced="yes" counter="L2_EXT_AW_CNT_Q3" title="50-75% outstanding" name="Output external outstanding writes 50-75%" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions"/>
<event offset="52" advanced="yes" counter="L2_EXT_SNOOP" title="Snoop transactions" name="Input external snoop transactions" description="The number of coherency snoops triggered by external requesters." units="transactions"/>
<event offset="53" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="Snoop stall" name="Input external snoop stall cycles" description="The number of cycles when a coherency snoop triggered by external requester is stalled." units="cycles"/>
</category>
<category name="Shader Core" per_cpu="no">
<event offset="4" counter="FRAG_ACTIVE" title="Fragment active" name="Fragment active cycles" description="The number of cycles when the shader core is processing a fragment workload." units="cycles"/>
<event offset="5" advanced="yes" counter="FRAG_PRIMITIVES_OUT" title="Loaded primitives" name="Fragment primitives loaded" description="The number of primitives loaded from the tile list by the fragment front-end." units="primitives"/>
<event offset="6" counter="FRAG_PRIM_RAST" title="Rasterized primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives"/>
<event offset="7" counter="FRAG_FPK_ACTIVE" title="Fragment FPK buffer active" name="Forward pixel kill buffer active cycles" description="The number of cycles when at least one quad is present in the pre-pipe quad queue." units="cycles"/>
<event offset="9" counter="FRAG_WARPS" title="Fragment warps" name="Fragment warps" description="The number of fragment warps created." units="warps"/>
<event offset="10" counter="FRAG_PARTIAL_QUADS_RAST" title="Partial rasterized fine quads" name="Partial rasterized fine quads" description="The number of rasterized fine quads created with partial coverage." units="quads"/>
<event offset="11" counter="FRAG_QUADS_RAST" title="Rasterized fine quads" name="Rasterized fine quads" description="The number of fine quads generated by the rasterization phase." units="quads"/>
<event offset="12" counter="FRAG_QUADS_EZS_TEST" title="Early ZS tested quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads"/>
<event offset="13" counter="FRAG_QUADS_EZS_UPDATE" title="Early ZS updated quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads"/>
<event offset="14" counter="FRAG_QUADS_EZS_KILL" title="Early ZS killed quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads"/>
<event offset="15" counter="FRAG_LZS_TEST" title="Late ZS tested quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads"/>
<event offset="16" counter="FRAG_LZS_KILL" title="Late ZS killed quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads"/>
<event offset="17" counter="WARP_REG_SIZE_64" title="All register warps" name="Warps using more than 32 registers" description="The number of warps that require more than 32 registers." units="warps"/>
<event offset="18" counter="FRAG_PTILES" title="Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles"/>
<event offset="19" counter="FRAG_TRANS_ELIM" title="Killed unchanged tiles" name="Killed unchanged tiles" description="The number of tiles killed by transaction elimination." units="tiles"/>
<event offset="20" counter="QUAD_FPK_KILLER" title="Occluding quads" name="Occluding quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads"/>
<event offset="21" counter="FULL_QUAD_WARPS" title="Full warps" name="Full warps" description="The number of warps that have a full thread slot allocation." units="warps"/>
<event offset="22" counter="COMPUTE_ACTIVE" title="Non-fragment active" name="Non-fragment active cycles" description="The number of cycles when the shader core is processing some non-fragment workload." units="cycles"/>
<event offset="23" advanced="yes" counter="COMPUTE_TASKS" title="Non-fragment tasks" name="Non-fragment core tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks"/>
<event offset="24" counter="COMPUTE_WARPS" title="Non-fragment warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps"/>
<event offset="26" counter="EXEC_CORE_ACTIVE" title="Execution core active" name="Execution core active cycles" description="The number of cycles when the shader core is processing at least one warp." units="cycles"/>
<event offset="27" counter="EXEC_INSTR_FMA" title="FMA pipe instructions" name="Arithmetic FMA pipe instructions" description="The number of instructions issued to the FMA pipe." units="instructions"/>
<event offset="28" counter="EXEC_INSTR_CVT" title="CVT pipe instructions" name="Arithmetic CVT pipe instructions" description="The number of instructions issued to the CVT pipe." units="instructions"/>
<event offset="29" counter="EXEC_INSTR_SFU" title="SFU pipe instructions" name="Arithmetic SFU pipe instructions" description="The number of instructions issued to the SFU pipe." units="instructions"/>
<event offset="31" counter="EXEC_INSTR_DIVERGED" title="Diverged instructions" name="Diverged instructions" description="The number of instructions run per warp that have control flow divergence." units="instructions"/>
<event offset="32" advanced="yes" counter="EXEC_ICACHE_MISS" title="Cache misses" name="Instruction cache misses" description="The number of instruction cache misses." units="requests"/>
<event offset="33" advanced="yes" counter="EXEC_STARVE_ARITH" title="Execution engine starvation" name="Processing unit starvation cycles" description="The number of cycles when the processing unit is starved of work." units="cycles"/>
<event offset="34" counter="CALL_BLEND_SHADER" title="Blend shader calls" name="Blend shader instructions" description="The number of blend shader invocations run." units="instructions"/>
<event offset="35" counter="TEX_MSGI_NUM_FLITS" title="Input beats" name="Texture message read beats" description="The number of texture request message data beats." units="beats"/>
<event offset="36" counter="TEX_DFCH_CLK_STALLED" title="Descriptor stall" name="Texture descriptor stall cycles" description="The number of cycles when a quad is stalled on texture descriptor fetch." units="cycles"/>
<event offset="37" counter="TEX_TFCH_CLK_STALLED" title="Fetch queue stall" name="Texture fetch stall cycles" description="The number of cycles when a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles"/>
<event offset="38" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" title="Filtering unit stall" name="Texture filtering stall cycles" description="The number of cycles when the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles"/>
<event offset="39" counter="TEX_FILT_NUM_OPERATIONS" title="Texture filtering active" name="Texture filtering cycles" description="The number of texture filtering issue cycles." units="cycles"/>
<event offset="40" counter="TEX_FILT_NUM_FXR_OPERATIONS" title="Full bilinear filter active" name="Texture filtering cycles using full bilinear" description="The number of cycles when the filtering unit is filled with bilinear filtering." units="cycles"/>
<event offset="41" counter="TEX_FILT_NUM_FST_OPERATIONS" title="Full trilinear filter active" name="Texture filtering cycles using full trilinear" description="The number of cycles when the filtering unit is filled with trilinear filtering." units="cycles"/>
<event offset="42" advanced="yes" counter="TEX_MSGO_NUM_MSG" title="Texture messages" name="Texture messages" description="The number of output texture messages from the texture unit." units="issues"/>
<event offset="43" counter="TEX_MSGO_NUM_FLITS" title="Output beats" name="Texture message write beats" description="The number of texture response message data beats." units="beats"/>
<event offset="44" counter="LS_MEM_READ_FULL" title="Full read" name="Load/store unit full read issues" description="The number of full-width load/store cache reads." units="cycles"/>
<event offset="45" counter="LS_MEM_READ_SHORT" title="Partial read" name="Load/store unit partial read issues" description="The number of partial-width load/store cache reads." units="cycles"/>
<event offset="46" counter="LS_MEM_WRITE_FULL" title="Full write" name="Load/store unit full write issues" description="The number of full-width load/store cache writes." units="cycles"/>
<event offset="47" counter="LS_MEM_WRITE_SHORT" title="Partial write" name="Load/store unit partial write issues" description="The number of partial-width load/store cache writes." units="cycles"/>
<event offset="48" counter="LS_MEM_ATOMIC" title="Atomic access" name="Load/store unit atomic issues" description="The number of load/store atomic accesses." units="cycles"/>
<event offset="49" counter="VARY_INSTR" title="Interpolation requests" name="Varying unit instructions" description="The number of warp-width interpolation operations processed by the varying unit." units="requests"/>
<event offset="50" counter="VARY_SLOT_32" title="32-bit interpolation issues" name="32-bit interpolation slots" description="The number of 32-bit interpolation slots issued by the varying unit." units="issues"/>
<event offset="51" counter="VARY_SLOT_16" title="16-bit interpolation issues" name="16-bit interpolation slots" description="The number of 16-bit interpolation slots issued by the varying unit." units="issues"/>
<event offset="52" advanced="yes" counter="ATTR_INSTR" title="Attribute requests" name="Attribute instructions" description="The number of instructions run by the attribute unit." units="instructions"/>
<event offset="53" counter="SHADER_CORE_ACTIVE" title="Any workload active" name="Any workload active cycles" description="The number of cycles when the shader core is processing either a non-fragment workload or a fragment workload." units="cycles"/>
<event offset="54" counter="BEATS_RD_FTC" title="Fragment L2 read beats" name="Fragment front-end read beats from L2 cache" description="The number of read beats received by the fixed-function fragment front-end." units="beats"/>
<event offset="55" counter="BEATS_RD_FTC_EXT" title="Fragment external read beats" name="Fragment front-end read beats from external memory" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access because of an L2 cache miss." units="beats"/>
<event offset="56" counter="BEATS_RD_LSC" title="Load/store L2 read beats" name="Load/store unit read beats from L2 cache" description="The number of read beats received by the load/store unit." units="beats"/>
<event offset="57" counter="BEATS_RD_LSC_EXT" title="Load/store external read beats" name="Load/store unit read beats from external memory" description="The number of read beats received by the load/store unit that required an external memory access because of an L2 cache miss." units="beats"/>
<event offset="58" counter="BEATS_RD_TEX" title="Texture L2 read beats" name="Texture unit read beats from L2 cache" description="The number of read beats received by the texture unit." units="beats"/>
<event offset="59" counter="BEATS_RD_TEX_EXT" title="Texture external read beats" name="Texture unit read beats from external memory" description="The number of read beats received by the texture unit that required an external memory access because of an L2 cache miss." units="beats"/>
<event offset="60" advanced="yes" counter="BEATS_RD_OTHER" title="Other L2 read beats" name="Miscellaneous read beats from L2 cache" description="The number of read beats received by a unit that is not specifically identified." units="beats"/>
<event offset="61" counter="BEATS_WR_LSC_OTHER" title="Load/store other write beats" name="Load/store unit other write beats" description="The number of write beats by the load/store unit that are because of any reason other than write-back." units="beats"/>
<event offset="62" counter="BEATS_WR_TIB" title="Tile unit write beats" name="Tile unit write beats to L2 memory system" description="The number of write beats sent by the tile write-back unit." units="beats"/>
<event offset="63" counter="BEATS_WR_LSC_WB" title="Load/store write-back write beats" name="Load/store unit write-back write beats" description="The number of write beats by the load/store unit that are because of write-back." units="beats"/>
</category>
</metrics>

View file

@ -4,7 +4,7 @@
pan_hw_metrics = [
'G31', 'G51', 'G52', 'G57', 'G68', 'G71', 'G72', 'G76', 'G77',
'G78', 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x',
'Gx10', 'G78', 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x',
]
pan_hw_metrics_xml_files = []