brw,anv: use XML-based stats

I didn't bother switching either iris or elk/hasvk but one could. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37517>
2026-05-05 05:18:08 +02:00 · 2025-09-22 13:00:49 -04:00 · 2025-09-22 13:00:49 -04:00 · c2ae207e80
commit c2ae207e80
parent b575b0954a
13 changed files with 116 additions and 192 deletions
--- a/src/gallium/drivers/iris/iris_program_cache.c
+++ b/src/gallium/drivers/iris/iris_program_cache.c
@ -462,7 +462,7 @@ iris_ensure_indirect_generation_shader(struct iris_batch *batch)

      brw_nir_analyze_ubo_ranges(screen->brw, nir, prog_data->base.ubo_ranges);

-      struct brw_compile_stats stats[3];
+      struct genisa_stats stats[3];
      struct brw_compile_fs_params params = {
         .base = {
            .nir = nir,
--- a/src/intel/compiler/brw_compile_bs.cpp
+++ b/src/intel/compiler/brw_compile_bs.cpp
@ -69,7 +69,7 @@ compile_single_bs(const struct brw_compiler *compiler,
                  struct brw_bs_prog_data *prog_data,
                  nir_shader *shader,
                  brw_generator *g,
-                  struct brw_compile_stats *stats,
+                  struct genisa_stats *stats,
                  int *prog_offset,
                  uint64_t *bsr)
 {
--- a/src/intel/compiler/brw_compile_cs.cpp
+++ b/src/intel/compiler/brw_compile_cs.cpp
@ -291,7 +291,7 @@ brw_compile_cs(const struct brw_compiler *compiler,

   uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1);

-   struct brw_compile_stats *stats = params->base.stats;
+   struct genisa_stats *stats = params->base.stats;
   for (unsigned simd = 0; simd < 3; simd++) {
      if (prog_data->prog_mask & (1u << simd)) {
         assert(v[simd]);
--- a/src/intel/compiler/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw_compile_fs.cpp
@ -1917,7 +1917,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
                                     nir->info.name));
   }

-   struct brw_compile_stats *stats = params->base.stats;
+   struct genisa_stats *stats = params->base.stats;
   uint32_t max_dispatch_width = 0;

   if (vmulti) {
@ -1947,7 +1947,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
      max_dispatch_width = 32;
   }

-   for (struct brw_compile_stats *s = params->base.stats; s != NULL && s != stats; s++)
+   for (struct genisa_stats *s = params->base.stats; s != NULL && s != stats; s++)
      s->max_dispatch_width = max_dispatch_width;

   g.add_const_data(nir->constant_data, nir->constant_data_size);
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@ -33,6 +33,7 @@
 #include "util/mesa-sha1.h"
 #include "util/enum_operators.h"
 #include "util/ralloc.h"
+#include "util/shader_stats.h"
 #include "util/u_math.h"
 #include "brw_isa_info.h"
 #include "intel_shader_enums.h"
@ -1427,20 +1428,6 @@ DEFINE_PROG_DATA_DOWNCAST(mesh, prog_data->stage == MESA_SHADER_MESH)

 #undef DEFINE_PROG_DATA_DOWNCAST

-struct brw_compile_stats {
-   uint32_t dispatch_width; /**< 0 for vec4 */
-   uint32_t max_polygons;
-   uint32_t max_dispatch_width;
-   uint32_t instructions;
-   uint32_t sends;
-   uint32_t loops;
-   uint32_t cycles;
-   uint32_t spills;
-   uint32_t fills;
-   uint32_t max_live_registers;
-   uint32_t non_ssa_registers_after_nir;
-};
-
 /** @} */

 struct brw_compiler *
@ -1483,7 +1470,7 @@ struct brw_compile_params {

   nir_shader *nir;

-   struct brw_compile_stats *stats;
+   struct genisa_stats *stats;

   void *log_data;

--- a/src/intel/compiler/brw_generator.cpp
+++ b/src/intel/compiler/brw_generator.cpp
@ -733,7 +733,7 @@ brw_generator::enable_debug(const char *shader_name)

 int
 brw_generator::generate_code(const brw_shader &s,
-                             struct brw_compile_stats *stats)
+                             struct genisa_stats *stats)
 {
   const int dispatch_width = s.dispatch_width;
   struct brw_shader_stats shader_stats = s.shader_stats;
@ -1512,15 +1512,30 @@ brw_generator::generate_code(const brw_shader &s,
   if (stats) {
      stats->dispatch_width = dispatch_width;
      stats->max_polygons = s.max_polygons;
-      stats->max_dispatch_width = dispatch_width;
-      stats->instructions = before_size / 16 - nop_count - sync_nop_count;
-      stats->sends = send_count;
-      stats->loops = loop_count;
-      stats->cycles = perf.latency;
-      stats->spills = shader_stats.spill_count;
-      stats->fills = shader_stats.fill_count;
+      stats->instrs = before_size / 16 - nop_count - sync_nop_count;
+      stats->send_messages = send_count;
+      stats->loop_count = loop_count;
+      stats->cycle_count = perf.latency;
+      stats->spill_count = shader_stats.spill_count;
+      stats->fill_count = shader_stats.fill_count;
      stats->max_live_registers = shader_stats.max_register_pressure;
-      stats->non_ssa_registers_after_nir = shader_stats.non_ssa_registers_after_nir;
+      stats->non_ssa_regs_after_nir = shader_stats.non_ssa_registers_after_nir;
+      stats->source_hash = prog_data->source_hash;
+      stats->grf_registers = devinfo->ver >= 30 ? prog_data->grf_used : 0;
+
+      /* Report the max dispatch width only on the smallest SIMD variant.
+       *
+       * XXX: SIMD8 is not the smallest on Xe2. This logic should be adjusted.
+       */
+      if (stage != MESA_SHADER_FRAGMENT || dispatch_width == 8)
+         stats->max_dispatch_width = dispatch_width;
+      else
+         stats->max_dispatch_width = 0;
+
+      if (mesa_shader_stage_uses_workgroup(stage))
+         stats->workgroup_memory_size = prog_data->total_shared;
+      else
+         stats->workgroup_memory_size = 0;
   }

   return start_offset;
--- a/src/intel/compiler/brw_generator.h
+++ b/src/intel/compiler/brw_generator.h
@ -19,7 +19,7 @@ public:

   void enable_debug(const char *shader_name);
   int generate_code(const brw_shader &s,
-                     struct brw_compile_stats *stats);
+                     struct genisa_stats *stats);
   void add_const_data(void *data, unsigned size);
   void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt);
   const unsigned *get_assembly();
--- a/src/intel/vulkan/anv_internal_kernels.c
+++ b/src/intel/vulkan/anv_internal_kernels.c
@ -48,6 +48,14 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
   return true;
 }

+static void
+check_sends(struct genisa_stats *stats, unsigned send_count)
+{
+   assert(stats->spill_count == 0);
+   assert(stats->fill_count == 0);
+   assert(stats->send_messages == send_count);
+}
+
 static struct anv_shader_bin *
 compile_shader(struct anv_device *device,
               enum anv_internal_kernel_name shader_name,
@ -153,7 +161,7 @@ compile_shader(struct anv_device *device,

   const unsigned *program;
   if (stage == MESA_SHADER_FRAGMENT) {
-      struct brw_compile_stats stats[3];
+      struct genisa_stats stats[3];
      struct brw_compile_fs_params params = {
         .base = {
            .nir = nir,
@ -170,28 +178,18 @@ compile_shader(struct anv_device *device,
      if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
         unsigned stat_idx = 0;
         if (prog_data.wm.dispatch_8) {
-            assert(stats[stat_idx].spills == 0);
-            assert(stats[stat_idx].fills == 0);
-            assert(stats[stat_idx].sends == sends_count_expectation);
-            stat_idx++;
+            check_sends(&stats[stat_idx++], sends_count_expectation);
         }
         if (prog_data.wm.dispatch_16) {
-            assert(stats[stat_idx].spills == 0);
-            assert(stats[stat_idx].fills == 0);
-            assert(stats[stat_idx].sends == sends_count_expectation);
-            stat_idx++;
+            check_sends(&stats[stat_idx++], sends_count_expectation);
         }
         if (prog_data.wm.dispatch_32) {
-            assert(stats[stat_idx].spills == 0);
-            assert(stats[stat_idx].fills == 0);
-            assert(stats[stat_idx].sends ==
-                   sends_count_expectation *
-                   (device->info->ver < 20 ? 2 : 1));
-            stat_idx++;
+            check_sends(&stats[stat_idx++], sends_count_expectation *
+                                            (device->info->ver < 20 ? 2 : 1));
         }
      }
   } else {
-      struct brw_compile_stats stats;
+      struct genisa_stats stats;
      struct brw_compile_cs_params params = {
         .base = {
            .nir = nir,
@ -206,9 +204,7 @@ compile_shader(struct anv_device *device,
      program = brw_compile_cs(compiler, &params);

      if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
-         assert(stats.spills == 0);
-         assert(stats.fills == 0);
-         assert(stats.sends == sends_count_expectation);
+         check_sends(&stats, sends_count_expectation);
      }
   }

--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@ -103,7 +103,7 @@ anv_shader_bin_create(struct anv_device *device,
                      const void *kernel_data, uint32_t kernel_size,
                      const struct brw_stage_prog_data *prog_data_in,
                      uint32_t prog_data_size,
-                      const struct brw_compile_stats *stats, uint32_t num_stats,
+                      const struct genisa_stats *stats, uint32_t num_stats,
                      const nir_xfb_info *xfb_info_in,
                      const struct anv_pipeline_bind_map *bind_map,
                      const struct anv_push_descriptor_info *push_desc_info)
@ -381,7 +381,7 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,

   void *mem_ctx = ralloc_context(NULL);
   uint32_t num_stats = blob_read_uint32(blob);
-   const struct brw_compile_stats *stats =
+   const struct genisa_stats *stats =
      blob_read_bytes(blob, num_stats * sizeof(stats[0]));

   const nir_xfb_info *xfb_info = NULL;
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@ -1225,7 +1225,7 @@ struct anv_shader {

   const struct brw_stage_prog_data *prog_data;

-   struct brw_compile_stats stats[3];
+   struct genisa_stats stats[3];
   uint32_t num_stats;

   char *nir_str;
@ -5097,7 +5097,7 @@ struct anv_shader_upload_params {
   const struct brw_stage_prog_data *prog_data;
   uint32_t prog_data_size;

-   const struct brw_compile_stats *stats;
+   const struct genisa_stats *stats;
   uint32_t num_stats;

   const struct nir_xfb_info *xfb_info;
@ -5145,7 +5145,7 @@ struct anv_shader_bin {
   const struct brw_stage_prog_data *prog_data;
   uint32_t prog_data_size;

-   struct brw_compile_stats stats[3];
+   struct genisa_stats stats[3];
   uint32_t num_stats;

   struct nir_xfb_info *xfb_info;
@ -5178,7 +5178,7 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
 struct anv_pipeline_executable {
   mesa_shader_stage stage;

-   struct brw_compile_stats stats;
+   struct genisa_stats stats;

   char *nir;
   char *disasm;
--- a/src/intel/vulkan/anv_shader.c
+++ b/src/intel/vulkan/anv_shader.c
@ -8,6 +8,7 @@
 #include "nir/nir_serialize.h"

 #include "compiler/brw_disasm.h"
+#include "util/shader_stats.h"

 static void
 anv_shader_destroy(struct vk_device *vk_device,
@ -174,7 +175,7 @@ anv_shader_get_executable_properties(struct vk_device *device,
      container_of(vk_shader, struct anv_shader, vk);

   for (uint32_t i = 0; i < shader->num_stats; i++) {
-      const struct brw_compile_stats *stats = &shader->stats[i];
+      const struct genisa_stats *stats = &shader->stats[i];

      vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
         mesa_shader_stage stage = vk_shader->stage;
@ -219,145 +220,11 @@ anv_shader_get_executable_statistics(struct vk_device *vk_device,
 {
   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
                          statistics, statistic_count);
-   struct anv_device *device =
-      container_of(vk_device, struct anv_device, vk);
   struct anv_shader *shader =
      container_of(vk_shader, struct anv_shader, vk);

   assert(executable_index < shader->num_stats);
-
-   const struct brw_compile_stats *stats = &shader->stats[executable_index];
-   const struct brw_stage_prog_data *prog_data = shader->prog_data;
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Instruction Count");
-      VK_COPY_STR(stat->description,
-                  "Number of GEN instructions in the final generated "
-                  "shader executable.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = stats->instructions;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "SEND Count");
-      VK_COPY_STR(stat->description,
-                  "Number of instructions in the final generated shader "
-                  "executable which access external units such as the "
-                  "constant cache or the sampler.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = stats->sends;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Loop Count");
-      VK_COPY_STR(stat->description,
-                  "Number of loops (not unrolled) in the final generated "
-                  "shader executable.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = stats->loops;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Cycle Count");
-      VK_COPY_STR(stat->description,
-                  "Estimate of the number of EU cycles required to execute "
-                  "the final generated executable.  This is an estimate only "
-                  "and may vary greatly from actual run-time performance.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = stats->cycles;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Spill Count");
-      VK_COPY_STR(stat->description,
-                  "Number of scratch spill operations.  This gives a rough "
-                  "estimate of the cost incurred due to spilling temporary "
-                  "values to memory.  If this is non-zero, you may want to "
-                  "adjust your shader to reduce register pressure.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = stats->spills;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Fill Count");
-      VK_COPY_STR(stat->description,
-                  "Number of scratch fill operations.  This gives a rough "
-                  "estimate of the cost incurred due to spilling temporary "
-                  "values to memory.  If this is non-zero, you may want to "
-                  "adjust your shader to reduce register pressure.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = stats->fills;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Scratch Memory Size");
-      VK_COPY_STR(stat->description,
-                  "Number of bytes of scratch memory required by the "
-                  "generated shader executable.  If this is non-zero, you "
-                  "may want to adjust your shader to reduce register "
-                  "pressure.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = prog_data->total_scratch;
-   }
-
-   if (device->info->ver >= 30) {
-      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-         VK_COPY_STR(stat->name, "GRF registers");
-         VK_COPY_STR(stat->description,
-                     "Number of GRF registers required by the shader.");
-         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-         stat->value.u64 = prog_data->grf_used;
-      }
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Max dispatch width");
-      VK_COPY_STR(stat->description,
-                  "Largest SIMD dispatch width.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      /* Report the max dispatch width only on the smallest SIMD variant */
-      if (vk_shader->stage != MESA_SHADER_FRAGMENT || stats->dispatch_width == 8)
-         stat->value.u64 = stats->max_dispatch_width;
-      else
-         stat->value.u64 = 0;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Max live registers");
-      VK_COPY_STR(stat->description,
-                  "Maximum number of registers used across the entire shader.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = stats->max_live_registers;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Workgroup Memory Size");
-      VK_COPY_STR(stat->description,
-                  "Number of bytes of workgroup shared memory used by this "
-                  "shader including any padding.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      if (mesa_shader_stage_uses_workgroup(vk_shader->stage))
-         stat->value.u64 = prog_data->total_shared;
-      else
-         stat->value.u64 = 0;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Source hash");
-      VK_PRINT_STR(stat->description,
-                   "hash = 0x%08x. Hash generated from shader source.",
-                   prog_data->source_hash);
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = prog_data->source_hash;
-   }
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
-      VK_COPY_STR(stat->name, "Non SSA regs after NIR");
-      VK_COPY_STR(stat->description, "Non SSA regs after NIR translation to BRW.");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = stats->non_ssa_registers_after_nir;
-   }
-
+   vk_add_genisa_stats(out, &shader->stats[executable_index]);
   return VK_SUCCESS;
 }

--- a/src/intel/vulkan/anv_shader.h
+++ b/src/intel/vulkan/anv_shader.h
@ -93,7 +93,7 @@ struct anv_shader_data {
   const nir_xfb_info *xfb_info;

   uint32_t num_stats;
-   struct brw_compile_stats stats[3];
+   struct genisa_stats stats[3];
   char *disasm[3];

   bool use_primitive_replication;
--- a/src/util/shader_stats.xml
+++ b/src/util/shader_stats.xml
@ -121,4 +121,63 @@
      <stat name="SMEM">Number of SMEM instructions</stat>
      <stat name="VOPD" more="better">Number of VOPD instructions</stat>
   </isa>
+
+   <family name="Intel">
+      <isa name="GenISA">
+         <stat name="Dispatch width" hidden="true">0 for vec4</stat>
+         <stat name="Max polygons" hidden="true"/>
+         <stat name="Instruction Count" display="Instrs">
+            Number of GEN instructions in the final generated shader executable.
+         </stat>
+         <stat name="SEND Count" display="Send messages">
+            Number of instructions in the final generated shader executable
+            which access external units such as the constant cache or the sampler.
+         </stat>
+         <stat name="Loop Count" display="Loop count">
+            Number of loops (not unrolled) in the final generated shader
+            executable.
+         </stat>
+         <stat name="Cycle Count" display="Cycle count">
+            Estimate of the number of EU cycles required to execute the final
+            generated executable. This is an estimate only and may vary greatly
+            from actual run-time performance.
+         </stat>
+         <stat name="Spill Count" display="Spill count">
+            Number of scratch spill operations. This gives a rough estimate of
+            the cost incurred due to spilling temporary values to memory. If
+            this is non-zero, you may want to adjust your shader to reduce
+            register pressure.
+         </stat>
+         <stat name="Fill Count" display="Fill count">
+            Number of scratch fill operations. This gives a rough estimate of
+            the cost incurred due to spilling temporary values to memory. If
+            this is non-zero, you may want to adjust your shader to reduce
+            register pressure.
+         </stat>
+         <stat name="Scratch Memory Size">
+            Number of bytes of scratch memory required by the generated shader
+            executable. If this is non-zero, you may want to adjust your shader
+            to reduce register pressure.
+         </stat>
+         <stat name="GRF registers">
+            Number of GRF registers required by the shader.
+         </stat>
+         <stat name="Max dispatch width" more="better">
+            Largest SIMD dispatch width.
+         </stat>
+         <stat name="Max live registers">
+            Maximum number of registers used across the entire shader.
+         </stat>
+         <stat name="Workgroup Memory Size">
+            Number of bytes of workgroup shared memory used by this shader
+            including any padding.
+         </stat>
+         <stat name="Non SSA regs after NIR">
+            Non SSA regs after NIR translation to BRW.
+         </stat>
+         <stat name="Source hash" hash="true">
+            Hash generated from shader source.
+         </stat>
+       </isa>
+    </family>
 </shaderdb>