radv,aco: switch to shader statistics framework

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12756 Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35583>
2025-12-29 03:40:10 +01:00 · 2025-06-17 15:07:04 +01:00 · 2025-06-17 15:07:04 +01:00 · 325dfd809a
commit 325dfd809a
parent 2382d657ec
11 changed files with 110 additions and 138 deletions
--- a/src/amd/compiler/aco_interface.cpp
+++ b/src/amd/compiler/aco_interface.cpp
@ -20,36 +20,6 @@ using namespace aco;

 namespace {

-static const std::array<aco_compiler_statistic_info, aco_num_statistics> statistic_infos = []()
-{
-   std::array<aco_compiler_statistic_info, aco_num_statistics> ret{};
-   ret[aco_statistic_hash] =
-      aco_compiler_statistic_info{"Hash", "CRC32 hash of code and constant data"};
-   ret[aco_statistic_instructions] =
-      aco_compiler_statistic_info{"Instructions", "Instruction count"};
-   ret[aco_statistic_copies] =
-      aco_compiler_statistic_info{"Copies", "Copy instructions created for pseudo-instructions"};
-   ret[aco_statistic_branches] = aco_compiler_statistic_info{"Branches", "Branch instructions"};
-   ret[aco_statistic_latency] =
-      aco_compiler_statistic_info{"Latency", "Issue cycles plus stall cycles"};
-   ret[aco_statistic_inv_throughput] = aco_compiler_statistic_info{
-      "Inverse Throughput", "Estimated busy cycles to execute one wave"};
-   ret[aco_statistic_vmem_clauses] = aco_compiler_statistic_info{
-      "VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"};
-   ret[aco_statistic_smem_clauses] = aco_compiler_statistic_info{
-      "SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"};
-   ret[aco_statistic_sgpr_presched] =
-      aco_compiler_statistic_info{"Pre-Sched SGPRs", "SGPR usage before scheduling"};
-   ret[aco_statistic_vgpr_presched] =
-      aco_compiler_statistic_info{"Pre-Sched VGPRs", "VGPR usage before scheduling"};
-   ret[aco_statistic_valu] = aco_compiler_statistic_info{"VALU", "Number of VALU instructions"};
-   ret[aco_statistic_salu] = aco_compiler_statistic_info{"SALU", "Number of SALU instructions"};
-   ret[aco_statistic_vmem] = aco_compiler_statistic_info{"VMEM", "Number of VMEM instructions"};
-   ret[aco_statistic_smem] = aco_compiler_statistic_info{"SMEM", "Number of SMEM instructions"};
-   ret[aco_statistic_vopd] = aco_compiler_statistic_info{"VOPD", "Number of VOPD instructions"};
-   return ret;
-}();
-
 static void
 validate(Program* program)
 {
@ -222,8 +192,7 @@ aco_compile_shader_part(const struct aco_compiler_options* options,
   std::unique_ptr<Program> program{new Program};

   program->collect_statistics = options->record_stats;
-   if (program->collect_statistics)
-      memset(program->statistics, 0, sizeof(program->statistics));
+   memset(&program->statistics, 0, sizeof(program->statistics));

   program->debug.func = options->debug.func;
   program->debug.private_data = options->debug.private_data;
@ -262,8 +231,7 @@ aco_compile_shader(const struct aco_compiler_options* options, const struct aco_
   std::unique_ptr<Program> program{new Program};

   program->collect_statistics = options->record_stats;
-   if (program->collect_statistics)
-      memset(program->statistics, 0, sizeof(program->statistics));
+   memset(&program->statistics, 0, sizeof(program->statistics));

   program->debug.func = options->debug.func;
   program->debug.private_data = options->debug.private_data;
@ -289,14 +257,9 @@ aco_compile_shader(const struct aco_compiler_options* options, const struct aco_
   if (options->record_asm)
      disasm = get_disasm_string(program.get(), code, exec_size);

-   size_t stats_size = 0;
-   if (program->collect_statistics)
-      stats_size = aco_num_statistics * sizeof(uint32_t);
-
   (*build_binary)(binary, &config, llvm_ir.c_str(), llvm_ir.size(), disasm.c_str(), disasm.size(),
-                   program->statistics, stats_size, exec_size, code.data(), code.size(),
-                   symbols.data(), symbols.size(), program->debug_info.data(),
-                   program->debug_info.size());
+                   &program->statistics, exec_size, code.data(), code.size(), symbols.data(),
+                   symbols.size(), program->debug_info.data(), program->debug_info.size());
 }

 void
@ -337,8 +300,8 @@ aco_compile_rt_prolog(const struct aco_compiler_options* options,
   if (options->record_asm)
      disasm = get_disasm_string(program.get(), code, exec_size);

-   (*build_prolog)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), program->statistics, 0,
-                   exec_size, code.data(), code.size(), NULL, 0, NULL, 0);
+   (*build_prolog)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), NULL, exec_size,
+                   code.data(), code.size(), NULL, 0, NULL, 0);
 }

 void
@ -437,8 +400,8 @@ aco_compile_trap_handler(const struct aco_compiler_options* options,
   if (options->record_asm)
      disasm = get_disasm_string(program.get(), code, exec_size);

-   (*build_binary)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), program->statistics, 0,
-                   exec_size, code.data(), code.size(), NULL, 0, NULL, 0);
+   (*build_binary)(binary, &config, NULL, 0, disasm.c_str(), disasm.size(), NULL, exec_size,
+                   code.data(), code.size(), NULL, 0, NULL, 0);
 }

 uint64_t
@ -514,8 +477,6 @@ aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
   }
 }

-const aco_compiler_statistic_info* aco_statistic_infos = statistic_infos.data();
-
 void
 aco_print_asm(const struct radeon_info *info, unsigned wave_size,
              uint32_t *binary, unsigned num_dw)
--- a/src/amd/compiler/aco_interface.h
+++ b/src/amd/compiler/aco_interface.h
@ -8,10 +8,11 @@
 #define ACO_INTERFACE_H

 #include "aco_shader_info.h"
-#include "ac_shader_debug_info.h"

 #include "nir_defines.h"
+#include "util/shader_stats.h"

+#include "ac_shader_debug_info.h"
 #include "amd_family.h"
 #ifdef __cplusplus
 extern "C" {
@ -26,24 +27,17 @@ struct aco_vs_prolog_info;
 struct aco_ps_epilog_info;
 struct radeon_info;

-struct aco_compiler_statistic_info {
-   char name[32];
-   char desc[64];
-};
-
 typedef void(aco_callback)(void** priv_ptr, const struct ac_shader_config* config,
                           const char* llvm_ir_str, unsigned llvm_ir_size, const char* disasm_str,
-                           unsigned disasm_size, uint32_t* statistics, uint32_t stats_size,
-                           uint32_t exec_size, const uint32_t* code, uint32_t code_dw,
-                           const struct aco_symbol* symbols, unsigned num_symbols,
-                           const struct ac_shader_debug_info* debug_info, unsigned debug_info_count);
+                           unsigned disasm_size, struct amd_stats* stats, uint32_t exec_size,
+                           const uint32_t* code, uint32_t code_dw, const struct aco_symbol* symbols,
+                           unsigned num_symbols, const struct ac_shader_debug_info* debug_info,
+                           unsigned debug_info_count);

 typedef void(aco_shader_part_callback)(void** priv_ptr, uint32_t num_sgprs, uint32_t num_vgprs,
                                       const uint32_t* code, uint32_t code_size,
                                       const char* disasm_str, uint32_t disasm_size);

-extern const struct aco_compiler_statistic_info* aco_statistic_infos;
-
 void aco_compile_shader(const struct aco_compiler_options* options,
                        const struct aco_shader_info* info, unsigned shader_count,
                        struct nir_shader* const* shaders, const struct ac_shader_args* args,
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@ -12,6 +12,7 @@
 #include "aco_util.h"

 #include "util/compiler.h"
+#include "util/shader_stats.h"

 #include "ac_binary.h"
 #include "ac_hw_stage.h"
@ -2162,7 +2163,7 @@ public:
   CompilationProgress progress;

   bool collect_statistics = false;
-   uint32_t statistics[aco_num_statistics];
+   amd_stats statistics;

   float_mode next_fp_mode;
   unsigned next_loop_depth = 0;
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@ -1806,8 +1806,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
   bool skip_partial_copies = true;
   for (auto it = copy_map.begin();;) {
      if (copy_map.empty()) {
-         ctx->program->statistics[aco_statistic_copies] +=
-            ctx->instructions.size() - num_instructions_before;
+         ctx->program->statistics.copies += ctx->instructions.size() - num_instructions_before;
         return;
      }
      if (it == copy_map.end()) {
@ -2085,8 +2084,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
            break;
      }
   }
-   ctx->program->statistics[aco_statistic_copies] +=
-      ctx->instructions.size() - num_instructions_before;
+   ctx->program->statistics.copies += ctx->instructions.size() - num_instructions_before;
 }

 void
@ -2122,7 +2120,7 @@ handle_operands_linear_vgpr(std::map<PhysReg, copy_operation>& copy_map, lower_c
      pi->scratch_sgpr = scratch_sgpr;
   }

-   ctx->program->statistics[aco_statistic_copies] += scratch_sgpr == scc ? 2 : 4;
+   ctx->program->statistics.copies += scratch_sgpr == scc ? 2 : 4;
 }

 void
--- a/src/amd/compiler/aco_statistics.cpp
+++ b/src/amd/compiler/aco_statistics.cpp
@ -470,8 +470,8 @@ collect_presched_stats(Program* program)
   RegisterDemand presched_demand;
   for (Block& block : program->blocks)
      presched_demand.update(block.register_demand);
-   program->statistics[aco_statistic_sgpr_presched] = presched_demand.sgpr;
-   program->statistics[aco_statistic_vgpr_presched] = presched_demand.vgpr;
+   program->statistics.presgprs = presched_demand.sgpr;
+   program->statistics.prevgprs = presched_demand.vgpr;
 }

 /* instructions/branches/vmem_clauses/smem_clauses/cycles */
@ -482,31 +482,31 @@ collect_preasm_stats(Program* program)
      std::set<Instruction*> vmem_clause;
      std::set<Instruction*> smem_clause;

-      program->statistics[aco_statistic_instructions] += block.instructions.size();
+      program->statistics.instrs += block.instructions.size();

      for (aco_ptr<Instruction>& instr : block.instructions) {
         const bool is_branch =
            instr->isSOPP() && instr_info.classes[(int)instr->opcode] == instr_class::branch;
         if (is_branch)
-            program->statistics[aco_statistic_branches]++;
+            program->statistics.branches++;

         if (instr->isVALU() || instr->isVINTRP())
-            program->statistics[aco_statistic_valu]++;
+            program->statistics.valu++;
         if (instr->isSALU() && !instr->isSOPP() &&
             instr_info.classes[(int)instr->opcode] != instr_class::waitcnt)
-            program->statistics[aco_statistic_salu]++;
+            program->statistics.salu++;
         if (instr->isVOPD())
-            program->statistics[aco_statistic_vopd]++;
+            program->statistics.vopd++;

         if ((instr->isVMEM() || instr->isScratch() || instr->isGlobal()) &&
             !instr->operands.empty()) {
            if (std::none_of(vmem_clause.begin(), vmem_clause.end(),
                             [&](Instruction* other)
                             { return should_form_clause(instr.get(), other); }))
-               program->statistics[aco_statistic_vmem_clauses]++;
+               program->statistics.vclause++;
            vmem_clause.insert(instr.get());

-            program->statistics[aco_statistic_vmem]++;
+            program->statistics.vmem++;
         } else {
            vmem_clause.clear();
         }
@ -515,10 +515,10 @@ collect_preasm_stats(Program* program)
            if (std::none_of(smem_clause.begin(), smem_clause.end(),
                             [&](Instruction* other)
                             { return should_form_clause(instr.get(), other); }))
-               program->statistics[aco_statistic_smem_clauses]++;
+               program->statistics.sclause++;
            smem_clause.insert(instr.get());

-            program->statistics[aco_statistic_smem]++;
+            program->statistics.smem++;
         } else {
            smem_clause.clear();
         }
@ -598,8 +598,8 @@ collect_preasm_stats(Program* program)
         program->workgroup_size / (double)align(program->workgroup_size, program->wave_size);
   wave64_per_cycle *= max_utilization;

-   program->statistics[aco_statistic_latency] = round(latency);
-   program->statistics[aco_statistic_inv_throughput] = round(1.0 / wave64_per_cycle);
+   program->statistics.latency = round(latency);
+   program->statistics.invthroughput = round(1.0 / wave64_per_cycle);

   if (debug_flags & DEBUG_PERF_INFO) {
      aco_print_program(program, stderr, print_no_ssa | print_perf_info);
@ -624,7 +624,7 @@ collect_preasm_stats(Program* program)
 void
 collect_postasm_stats(Program* program, const std::vector<uint32_t>& code)
 {
-   program->statistics[aco_statistic_hash] = util_hash_crc32(code.data(), code.size() * 4);
+   program->statistics.hash = util_hash_crc32(code.data(), code.size() * 4);
 }

 Instruction_cycle_info
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@ -814,44 +814,41 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut

   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, pStatistics, pStatisticCount);

-   vk_add_exec_statistic_u64(out, "Driver pipeline hash", "Driver pipeline hash used by RGP", pipeline->pipeline_hash);
-   vk_add_exec_statistic_u64(out, "SGPRs", "Number of SGPR registers allocated per subgroup", shader->config.num_sgprs);
-   vk_add_exec_statistic_u64(out, "VGPRs", "Number of VGPR registers allocated per subgroup", shader->config.num_vgprs);
-   vk_add_exec_statistic_u64(out, "Spilled SGPRs", "Number of SGPR registers spilled per subgroup",
-                             shader->config.spilled_sgprs);
-   vk_add_exec_statistic_u64(out, "Spilled VGPRs", "Number of VGPR registers spilled per subgroup",
-                             shader->config.spilled_vgprs);
-   vk_add_exec_statistic_u64(out, "Code size", "Code size in bytes", shader->exec_size);
-   vk_add_exec_statistic_u64(out, "LDS size", "LDS size in bytes per workgroup",
-                             shader->config.lds_size * lds_increment);
-   vk_add_exec_statistic_u64(out, "Scratch size", "Private memory in bytes per subgroup",
-                             shader->config.scratch_bytes_per_wave);
-   vk_add_exec_statistic_u64(out, "Subgroups per SIMD", "The maximum number of subgroups in flight on a SIMD unit",
-                             shader->max_waves);
+   struct amd_stats stats = {};
+   if (shader->statistics)
+      stats = *shader->statistics;
+   stats.driverhash = pipeline->pipeline_hash;
+   stats.sgprs = shader->config.num_sgprs;
+   stats.vgprs = shader->config.num_vgprs;
+   stats.spillsgprs = shader->config.spilled_sgprs;
+   stats.spillvgprs = shader->config.spilled_vgprs;
+   stats.codesize = shader->exec_size;
+   stats.lds = shader->config.lds_size * lds_increment;
+   stats.scratch = shader->config.scratch_bytes_per_wave;
+   stats.maxwaves = shader->max_waves;

-   uint64_t inputs = 0;
   switch (stage) {
   case MESA_SHADER_VERTEX:
      if (gfx_level <= GFX8 || (!shader->info.vs.as_es && !shader->info.vs.as_ls)) {
         /* VS inputs when VS is a separate stage */
-         inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
+         stats.inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
      }
      break;

   case MESA_SHADER_TESS_CTRL:
      if (gfx_level >= GFX9) {
         /* VS inputs when pipeline has tess */
-         inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
+         stats.inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
      }

      /* VS -> TCS inputs */
-      inputs += shader->info.tcs.num_linked_inputs;
+      stats.inputs += shader->info.tcs.num_linked_inputs;
      break;

   case MESA_SHADER_TESS_EVAL:
      if (gfx_level <= GFX8 || !shader->info.tes.as_es) {
         /* TCS -> TES inputs when TES is a separate stage */
-         inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
+         stats.inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
      }
      break;

@ -863,60 +860,57 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
      if (gfx_level >= GFX9) {
         if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
            /* VS inputs when pipeline has GS but no tess */
-            inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
+            stats.inputs += util_bitcount(shader->info.vs.input_slot_usage_mask);
         } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
            /* TCS -> TES inputs when pipeline has GS */
-            inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
+            stats.inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
         }
      }

      /* VS -> GS or TES -> GS inputs */
-      inputs += shader->info.gs.num_linked_inputs;
+      stats.inputs += shader->info.gs.num_linked_inputs;
      break;

   case MESA_SHADER_FRAGMENT:
-      inputs += shader->info.ps.num_inputs;
+      stats.inputs += shader->info.ps.num_inputs;
      break;

   default:
      /* Other stages don't have IO or we are not interested in them. */
      break;
   }
-   vk_add_exec_statistic_u64(out, "Combined inputs",
-                             "Number of input slots reserved for the shader (including merged stages)", inputs);

-   uint64_t outputs = 0;
   switch (stage) {
   case MESA_SHADER_VERTEX:
      if (!shader->info.vs.as_ls && !shader->info.vs.as_es) {
         /* VS -> FS outputs. */
-         outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
-                    shader->info.outinfo.prim_param_exports;
+         stats.outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
+                          shader->info.outinfo.prim_param_exports;
      } else if (gfx_level <= GFX8) {
         /* VS -> TCS, VS -> GS outputs on GFX6-8 */
-         outputs += shader->info.vs.num_linked_outputs;
+         stats.outputs += shader->info.vs.num_linked_outputs;
      }
      break;

   case MESA_SHADER_TESS_CTRL:
      if (gfx_level >= GFX9) {
         /* VS -> TCS outputs on GFX9+ */
-         outputs += shader->info.vs.num_linked_outputs;
+         stats.outputs += shader->info.vs.num_linked_outputs;
      }

      /* TCS -> TES outputs */
-      outputs += shader->info.tcs.io_info.highest_remapped_vram_output +
-                 shader->info.tcs.io_info.highest_remapped_vram_patch_output;
+      stats.outputs += shader->info.tcs.io_info.highest_remapped_vram_output +
+                       shader->info.tcs.io_info.highest_remapped_vram_patch_output;
      break;

   case MESA_SHADER_TESS_EVAL:
      if (!shader->info.tes.as_es) {
         /* TES -> FS outputs */
-         outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
-                    shader->info.outinfo.prim_param_exports;
+         stats.outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
+                          shader->info.outinfo.prim_param_exports;
      } else if (gfx_level <= GFX8) {
         /* TES -> GS outputs on GFX6-8 */
-         outputs += shader->info.tes.num_linked_outputs;
+         stats.outputs += shader->info.tes.num_linked_outputs;
      }
      break;

@ -928,48 +922,41 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
      if (gfx_level >= GFX9) {
         if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
            /* VS -> GS outputs on GFX9+ */
-            outputs += shader->info.vs.num_linked_outputs;
+            stats.outputs += shader->info.vs.num_linked_outputs;
         } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
            /* TES -> GS outputs on GFX9+ */
-            outputs += shader->info.tes.num_linked_outputs;
+            stats.outputs += shader->info.tes.num_linked_outputs;
         }
      }

      if (shader->info.is_ngg) {
         /* GS -> FS outputs (GFX10+ NGG) */
-         outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
-                    shader->info.outinfo.prim_param_exports;
+         stats.outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
+                          shader->info.outinfo.prim_param_exports;
      } else {
         /* GS -> FS outputs (GFX6-10.3 legacy) */
-         outputs += shader->info.gs.gsvs_vertex_size / 16;
+         stats.outputs += shader->info.gs.gsvs_vertex_size / 16;
      }
      break;

   case MESA_SHADER_MESH:
      /* MS -> FS outputs */
-      outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
-                 shader->info.outinfo.prim_param_exports;
+      stats.outputs += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
+                       shader->info.outinfo.prim_param_exports;
      break;

   case MESA_SHADER_FRAGMENT:
-      outputs += DIV_ROUND_UP(util_bitcount(shader->info.ps.colors_written), 4) + !!shader->info.ps.writes_z +
-                 !!shader->info.ps.writes_stencil + !!shader->info.ps.writes_sample_mask +
-                 !!shader->info.ps.writes_mrt0_alpha;
+      stats.outputs += DIV_ROUND_UP(util_bitcount(shader->info.ps.colors_written), 4) + !!shader->info.ps.writes_z +
+                       !!shader->info.ps.writes_stencil + !!shader->info.ps.writes_sample_mask +
+                       !!shader->info.ps.writes_mrt0_alpha;
      break;

   default:
      /* Other stages don't have IO or we are not interested in them. */
      break;
   }
-   vk_add_exec_statistic_u64(out, "Combined outputs",
-                             "Number of output slots reserved for the shader (including merged stages)", outputs);

-   if (shader->statistics) {
-      for (unsigned i = 0; i < aco_num_statistics; i++) {
-         const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
-         vk_add_exec_statistic_u64(out, info->name, info->desc, shader->statistics[i]);
-      }
-   }
+   vk_add_amd_stats(out, &stats);

   return vk_outarray_status(&out);
 }
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@ -95,7 +95,7 @@ radv_shader_cache_deserialize(struct vk_pipeline_cache *cache, const void *key_d
 void
 radv_shader_serialize(struct radv_shader *shader, struct blob *blob)
 {
-   size_t stats_size = shader->statistics ? aco_num_statistics * sizeof(uint32_t) : 0;
+   size_t stats_size = shader->statistics ? sizeof(struct amd_stats) : 0;
   size_t code_size = shader->code_size;
   uint32_t total_size = sizeof(struct radv_shader_binary_legacy) + code_size + stats_size;

--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@ -2984,14 +2984,15 @@ radv_dump_nir_shaders(const struct radv_instance *instance, struct nir_shader *c

 static void
 radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config, const char *llvm_ir_str,
-                             unsigned llvm_ir_size, const char *disasm_str, unsigned disasm_size, uint32_t *statistics,
-                             uint32_t stats_size, uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
+                             unsigned llvm_ir_size, const char *disasm_str, unsigned disasm_size,
+                             struct amd_stats *statistics, uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
                             const struct aco_symbol *symbols, unsigned num_symbols,
                             const struct ac_shader_debug_info *debug_info, unsigned debug_info_count)
 {
   struct radv_shader_binary **binary = (struct radv_shader_binary **)bin;

   uint32_t debug_info_size = debug_info_count * sizeof(struct ac_shader_debug_info);
+   uint32_t stats_size = statistics ? sizeof(struct amd_stats) : 0;

   size_t size = llvm_ir_size;

@ -3019,7 +3020,7 @@ radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config,
   struct radv_shader_binary_layout layout = radv_shader_binary_get_layout(legacy_binary);

   if (stats_size)
-      memcpy(layout.stats, statistics, stats_size);
+      amd_stats_serialize(layout.stats, statistics);

   memcpy(layout.code, code, code_dw * sizeof(uint32_t));

--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@ -12,6 +12,7 @@
 #define RADV_SHADER_H

 #include "util/mesa-blake3.h"
+#include "util/shader_stats.h"
 #include "util/u_math.h"
 #include "vulkan/vulkan.h"
 #include "ac_binary.h"
@ -460,7 +461,7 @@ struct radv_shader {
   char *nir_string;
   char *disasm_string;
   char *ir_string;
-   uint32_t *statistics;
+   struct amd_stats *statistics;
   struct ac_shader_debug_info *debug_info;
   uint32_t debug_info_count;
 };
--- a/src/gallium/drivers/radeonsi/si_shader_aco.c
+++ b/src/gallium/drivers/radeonsi/si_shader_aco.c
@ -105,7 +105,7 @@ si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info,
 static void
 si_aco_build_shader_binary(void **data, const struct ac_shader_config *config,
                           const char *llvm_ir_str, unsigned llvm_ir_size, const char *disasm_str,
-                           unsigned disasm_size, uint32_t *statistics, uint32_t stats_size,
+                           unsigned disasm_size, struct amd_stats *statistics,
                           uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
                           const struct aco_symbol *symbols, unsigned num_symbols,
                           const struct ac_shader_debug_info *debug_info, unsigned debug_info_count)
--- a/src/util/shader_stats.xml
+++ b/src/util/shader_stats.xml
@ -90,4 +90,33 @@
      <stat name="TMU Fills" display="Fills">Number of times a register was filled from memory</stat>
      <stat name="QPU Read Stalls" display="Read Stalls">Number of cycles the QPU stalls for a register read dependency</stat>
   </isa>
+
+   <isa name="AMD">
+      <stat name="Driver pipeline hash" display="DriverHash" hash="true" type="u64">Driver pipeline hash used by RGP</stat>
+      <stat name="SGPRs">Number of SGPR registers allocated per subgroup</stat>
+      <stat name="VGPRs">Number of VGPR registers allocated per subgroup</stat>
+      <stat name="Spilled SGPRs" display="SpillSGPRs">Number of SGPR registers spilled per subgroup</stat>
+      <stat name="Spilled VGPRs" display="SpillVGPRs">Number of VGPR registers spilled per subgroup</stat>
+      <stat name="Code size" display="CodeSize">Code size in bytes</stat>
+      <stat name="LDS size" display="LDS">LDS size in bytes per workgroup</stat>
+      <stat name="Scratch size" display="Scratch">Private memory in bytes per subgroup</stat>
+      <stat name="Subgroups per SIMD" display="MaxWaves" more="better">The maximum number of subgroups in flight on a SIMD unit</stat>
+      <stat name="Combined inputs" display="Inputs">Number of input slots reserved for the shader (including merged stages)</stat>
+      <stat name="Combined outputs" display="Outputs">Number of output slots reserved for the shader (including merged stages)</stat>
+      <stat name="Hash" hash="true">CRC32 hash of code and constant data</stat>
+      <stat name="Instructions" display="Instrs">Instruction count</stat>
+      <stat name="Copies">Copy instructions created for pseudo-instructions</stat>
+      <stat name="Branches">Branch instructions</stat>
+      <stat name="Latency">Issue cycles plus stall cycles</stat>
+      <stat name="Inverse Throughput" display="InvThroughput">Estimated busy cycles to execute one wave</stat>
+      <stat name="VMEM Clause" display="VClause">Number of VMEM clauses (includes 1-sized clauses)</stat>
+      <stat name="SMEM Clause" display="SClause">Number of SMEM clauses (includes 1-sized clauses)</stat>
+      <stat name="Pre-Sched SGPRs" display="PreSGPRs">SGPR usage before scheduling</stat>
+      <stat name="Pre-Sched VGPRs" display="PreVGPRs">VGPR usage before scheduling</stat>
+      <stat name="VALU">Number of VALU instructions</stat>
+      <stat name="SALU">Number of SALU instructions</stat>
+      <stat name="VMEM">Number of VMEM instructions</stat>
+      <stat name="SMEM">Number of SMEM instructions</stat>
+      <stat name="VOPD" more="better">Number of VOPD instructions</stat>
+   </isa>
 </shaderdb>