From 8499d86b94f7b9cd6773148af33ab2d597ccf595 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 24 Apr 2026 00:10:16 -0400 Subject: [PATCH] radv/perfcounter: add GFX11 performance counter selectors GFX11 reorganizes the shader perfcounter blocks: wave counts move from SQ to the SQG registers (still mapped as the SQ block in ac/), while per-instruction counters move from SQ to the new SQ_WGP block. Add GFX11-specific selector enums using the new block assignments and branch radv_query_perfcounter_descs to select them on GFX11+. GL2C, GL1C, and TCP selectors are unchanged between GFX10.3 and GFX11. The "Instructions" (total count) counter is dropped on GFX11 as there is no direct SQ_WGP equivalent for INSTS_ALL. Selector indices verified against gpu_performance_api's gpa_hw_counter_gfx11.cc. Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_perfcounter.c | 89 +++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 23 deletions(-) diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 83721057a93..b08ed45f9aa 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -203,6 +203,18 @@ enum { SQ_PERF_SEL_INST_CYCLES_VALU_GFX10 = CTR(SQ, 0x75), }; +enum { + SQ_PERF_SEL_WAVES_GFX11 = CTR(SQ, 0x14), + SQ_PERF_SEL_INSTS_GDS_GFX11 = CTR(SQ_WGP, 0x36), + SQ_PERF_SEL_INSTS_LDS_GFX11 = CTR(SQ_WGP, 0x39), + SQ_PERF_SEL_INSTS_SALU_GFX11 = CTR(SQ_WGP, 0x3a), + SQ_PERF_SEL_INSTS_SMEM_GFX11 = CTR(SQ_WGP, 0x3b), + SQ_PERF_SEL_INSTS_VALU_GFX11 = CTR(SQ_WGP, 0x3e), + SQ_PERF_SEL_INSTS_TEX_LOAD_GFX11 = CTR(SQ_WGP, 0x42), + SQ_PERF_SEL_INSTS_TEX_STORE_GFX11 = CTR(SQ_WGP, 0x43), + SQ_PERF_SEL_INST_CYCLES_VALU_GFX11 = CTR(SQ_WGP, 0x67), +}; + enum { TCP_PERF_SEL_REQ_GFX10 = CTR(TCP, 0x9), TCP_PERF_SEL_REQ_MISS_GFX10 = CTR(TCP, 0x12), @@ -219,30 +231,61 @@ radv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count, ADD_PC(RADV_PC_OP_MAX, CYCLES, "GPU active cycles", "GRBM", "cycles the GPU is active processing a command buffer.", GPU_CYCLES, GRBM_PERF_SEL_GUI_ACTIVE); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES, SQ_PERF_SEL_WAVES); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed", SHADER_INSTRUCTIONS, - SQ_PERF_SEL_INSTS_ALL_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", "Number of VALU Instructions executed", - SHADER_INSTRUCTIONS_VALU, SQ_PERF_SEL_INSTS_VALU_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", "Number of SALU Instructions executed", - SHADER_INSTRUCTIONS_SALU, SQ_PERF_SEL_INSTS_SALU_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", "Number of VMEM load instructions executed", - SHADER_INSTRUCTIONS_VMEM_LOAD, SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", "Number of SMEM load instructions executed", - SHADER_INSTRUCTIONS_SMEM_LOAD, SQ_PERF_SEL_INSTS_SMEM_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders", "Number of VMEM store instructions executed", - SHADER_INSTRUCTIONS_VMEM_STORE, SQ_PERF_SEL_INSTS_TEX_STORE_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", "Number of LDS Instructions executed", - SHADER_INSTRUCTIONS_LDS, SQ_PERF_SEL_INSTS_LDS_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", "Number of GDS Instructions executed", - SHADER_INSTRUCTIONS_GDS, SQ_PERF_SEL_INSTS_GDS_GFX10); + if (pdev->info.gfx_level >= GFX11) { + ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES, + SQ_PERF_SEL_WAVES_GFX11); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", "Number of VALU Instructions executed", + SHADER_INSTRUCTIONS_VALU, SQ_PERF_SEL_INSTS_VALU_GFX11); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", "Number of SALU Instructions executed", + SHADER_INSTRUCTIONS_SALU, SQ_PERF_SEL_INSTS_SALU_GFX11); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", "Number of VMEM load instructions executed", + SHADER_INSTRUCTIONS_VMEM_LOAD, SQ_PERF_SEL_INSTS_TEX_LOAD_GFX11); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", "Number of SMEM load instructions executed", + SHADER_INSTRUCTIONS_SMEM_LOAD, SQ_PERF_SEL_INSTS_SMEM_GFX11); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders", + "Number of VMEM store instructions executed", SHADER_INSTRUCTIONS_VMEM_STORE, + SQ_PERF_SEL_INSTS_TEX_STORE_GFX11); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", "Number of LDS Instructions executed", + SHADER_INSTRUCTIONS_LDS, SQ_PERF_SEL_INSTS_LDS_GFX11); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", "Number of GDS Instructions executed", + SHADER_INSTRUCTIONS_GDS, SQ_PERF_SEL_INSTS_GDS_GFX11); - ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization", - "Percentage of time the VALU units are busy", SHADER_VALU_BUSY, SQ_PERF_SEL_INST_CYCLES_VALU_GFX10, - CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD); - ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization", - "Percentage of time the SALU units are busy", SHADER_SALU_BUSY, SQ_PERF_SEL_INSTS_SALU_GFX10, - CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS); + ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization", + "Percentage of time the VALU units are busy", SHADER_VALU_BUSY, SQ_PERF_SEL_INST_CYCLES_VALU_GFX11, + CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD); + ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization", + "Percentage of time the SALU units are busy", SHADER_SALU_BUSY, SQ_PERF_SEL_INSTS_SALU_GFX11, + CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS); + } else { + ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES, + SQ_PERF_SEL_WAVES); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed", + SHADER_INSTRUCTIONS, SQ_PERF_SEL_INSTS_ALL_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", "Number of VALU Instructions executed", + SHADER_INSTRUCTIONS_VALU, SQ_PERF_SEL_INSTS_VALU_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", "Number of SALU Instructions executed", + SHADER_INSTRUCTIONS_SALU, SQ_PERF_SEL_INSTS_SALU_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", + "Number of VMEM load instructions executed", SHADER_INSTRUCTIONS_VMEM_LOAD, + SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", + "Number of SMEM load instructions executed", SHADER_INSTRUCTIONS_SMEM_LOAD, + SQ_PERF_SEL_INSTS_SMEM_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders", + "Number of VMEM store instructions executed", SHADER_INSTRUCTIONS_VMEM_STORE, + SQ_PERF_SEL_INSTS_TEX_STORE_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", "Number of LDS Instructions executed", + SHADER_INSTRUCTIONS_LDS, SQ_PERF_SEL_INSTS_LDS_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", "Number of GDS Instructions executed", + SHADER_INSTRUCTIONS_GDS, SQ_PERF_SEL_INSTS_GDS_GFX10); + + ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization", + "Percentage of time the VALU units are busy", SHADER_VALU_BUSY, SQ_PERF_SEL_INST_CYCLES_VALU_GFX10, + CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD); + ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization", + "Percentage of time the SALU units are busy", SHADER_SALU_BUSY, SQ_PERF_SEL_INSTS_SALU_GFX10, + CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS); + } if (pdev->info.gfx_level >= GFX10_3) { ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", "Number of bytes read from VRAM",