radv/perfcounter: add GFX11 performance counter selectors

GFX11 reorganizes the shader perfcounter blocks: wave counts move from
SQ to the SQG registers (still mapped as the SQ block in ac/), while
per-instruction counters move from SQ to the new SQ_WGP block.

Add GFX11-specific selector enums using the new block assignments and
branch radv_query_perfcounter_descs to select them on GFX11+. GL2C,
GL1C, and TCP selectors are unchanged between GFX10.3 and GFX11.

The "Instructions" (total count) counter is dropped on GFX11 as there
is no direct SQ_WGP equivalent for INSTS_ALL.

Selector indices verified against gpu_performance_api's
gpa_hw_counter_gfx11.cc.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41157>
This commit is contained in:
Matt Turner 2026-04-24 00:10:16 -04:00 committed by Marge Bot
parent 703de21af8
commit 8499d86b94

View file

@ -203,6 +203,18 @@ enum {
SQ_PERF_SEL_INST_CYCLES_VALU_GFX10 = CTR(SQ, 0x75),
};
enum {
SQ_PERF_SEL_WAVES_GFX11 = CTR(SQ, 0x14),
SQ_PERF_SEL_INSTS_GDS_GFX11 = CTR(SQ_WGP, 0x36),
SQ_PERF_SEL_INSTS_LDS_GFX11 = CTR(SQ_WGP, 0x39),
SQ_PERF_SEL_INSTS_SALU_GFX11 = CTR(SQ_WGP, 0x3a),
SQ_PERF_SEL_INSTS_SMEM_GFX11 = CTR(SQ_WGP, 0x3b),
SQ_PERF_SEL_INSTS_VALU_GFX11 = CTR(SQ_WGP, 0x3e),
SQ_PERF_SEL_INSTS_TEX_LOAD_GFX11 = CTR(SQ_WGP, 0x42),
SQ_PERF_SEL_INSTS_TEX_STORE_GFX11 = CTR(SQ_WGP, 0x43),
SQ_PERF_SEL_INST_CYCLES_VALU_GFX11 = CTR(SQ_WGP, 0x67),
};
enum {
TCP_PERF_SEL_REQ_GFX10 = CTR(TCP, 0x9),
TCP_PERF_SEL_REQ_MISS_GFX10 = CTR(TCP, 0x12),
@ -219,30 +231,61 @@ radv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count,
ADD_PC(RADV_PC_OP_MAX, CYCLES, "GPU active cycles", "GRBM", "cycles the GPU is active processing a command buffer.",
GPU_CYCLES, GRBM_PERF_SEL_GUI_ACTIVE);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES, SQ_PERF_SEL_WAVES);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed", SHADER_INSTRUCTIONS,
SQ_PERF_SEL_INSTS_ALL_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", "Number of VALU Instructions executed",
SHADER_INSTRUCTIONS_VALU, SQ_PERF_SEL_INSTS_VALU_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", "Number of SALU Instructions executed",
SHADER_INSTRUCTIONS_SALU, SQ_PERF_SEL_INSTS_SALU_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", "Number of VMEM load instructions executed",
SHADER_INSTRUCTIONS_VMEM_LOAD, SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", "Number of SMEM load instructions executed",
SHADER_INSTRUCTIONS_SMEM_LOAD, SQ_PERF_SEL_INSTS_SMEM_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders", "Number of VMEM store instructions executed",
SHADER_INSTRUCTIONS_VMEM_STORE, SQ_PERF_SEL_INSTS_TEX_STORE_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", "Number of LDS Instructions executed",
SHADER_INSTRUCTIONS_LDS, SQ_PERF_SEL_INSTS_LDS_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", "Number of GDS Instructions executed",
SHADER_INSTRUCTIONS_GDS, SQ_PERF_SEL_INSTS_GDS_GFX10);
if (pdev->info.gfx_level >= GFX11) {
ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES,
SQ_PERF_SEL_WAVES_GFX11);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", "Number of VALU Instructions executed",
SHADER_INSTRUCTIONS_VALU, SQ_PERF_SEL_INSTS_VALU_GFX11);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", "Number of SALU Instructions executed",
SHADER_INSTRUCTIONS_SALU, SQ_PERF_SEL_INSTS_SALU_GFX11);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", "Number of VMEM load instructions executed",
SHADER_INSTRUCTIONS_VMEM_LOAD, SQ_PERF_SEL_INSTS_TEX_LOAD_GFX11);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", "Number of SMEM load instructions executed",
SHADER_INSTRUCTIONS_SMEM_LOAD, SQ_PERF_SEL_INSTS_SMEM_GFX11);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders",
"Number of VMEM store instructions executed", SHADER_INSTRUCTIONS_VMEM_STORE,
SQ_PERF_SEL_INSTS_TEX_STORE_GFX11);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", "Number of LDS Instructions executed",
SHADER_INSTRUCTIONS_LDS, SQ_PERF_SEL_INSTS_LDS_GFX11);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", "Number of GDS Instructions executed",
SHADER_INSTRUCTIONS_GDS, SQ_PERF_SEL_INSTS_GDS_GFX11);
ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization",
"Percentage of time the VALU units are busy", SHADER_VALU_BUSY, SQ_PERF_SEL_INST_CYCLES_VALU_GFX10,
CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD);
ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization",
"Percentage of time the SALU units are busy", SHADER_SALU_BUSY, SQ_PERF_SEL_INSTS_SALU_GFX10,
CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS);
ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization",
"Percentage of time the VALU units are busy", SHADER_VALU_BUSY, SQ_PERF_SEL_INST_CYCLES_VALU_GFX11,
CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD);
ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization",
"Percentage of time the SALU units are busy", SHADER_SALU_BUSY, SQ_PERF_SEL_INSTS_SALU_GFX11,
CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS);
} else {
ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES,
SQ_PERF_SEL_WAVES);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed",
SHADER_INSTRUCTIONS, SQ_PERF_SEL_INSTS_ALL_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", "Number of VALU Instructions executed",
SHADER_INSTRUCTIONS_VALU, SQ_PERF_SEL_INSTS_VALU_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", "Number of SALU Instructions executed",
SHADER_INSTRUCTIONS_SALU, SQ_PERF_SEL_INSTS_SALU_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders",
"Number of VMEM load instructions executed", SHADER_INSTRUCTIONS_VMEM_LOAD,
SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders",
"Number of SMEM load instructions executed", SHADER_INSTRUCTIONS_SMEM_LOAD,
SQ_PERF_SEL_INSTS_SMEM_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders",
"Number of VMEM store instructions executed", SHADER_INSTRUCTIONS_VMEM_STORE,
SQ_PERF_SEL_INSTS_TEX_STORE_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", "Number of LDS Instructions executed",
SHADER_INSTRUCTIONS_LDS, SQ_PERF_SEL_INSTS_LDS_GFX10);
ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", "Number of GDS Instructions executed",
SHADER_INSTRUCTIONS_GDS, SQ_PERF_SEL_INSTS_GDS_GFX10);
ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization",
"Percentage of time the VALU units are busy", SHADER_VALU_BUSY, SQ_PERF_SEL_INST_CYCLES_VALU_GFX10,
CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD);
ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization",
"Percentage of time the SALU units are busy", SHADER_SALU_BUSY, SQ_PERF_SEL_INSTS_SALU_GFX10,
CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS);
}
if (pdev->info.gfx_level >= GFX10_3) {
ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", "Number of bytes read from VRAM",