diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c
index f0ad06fee4c..e762c8437cf 100644
--- a/src/gallium/drivers/panfrost/pan_shader.c
+++ b/src/gallium/drivers/panfrost/pan_shader.c
@@ -36,6 +36,7 @@
 #include "nir_serialize.h"
 #include "pan_bo.h"
 #include "pan_context.h"
+#include "shader_enums.h"
 
 static struct panfrost_uncompiled_shader *
 panfrost_alloc_shader(const nir_shader *nir)
@@ -128,7 +129,6 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
       pan_shader_preprocess(s, panfrost_device_gpu_id(dev));
 
    struct panfrost_compile_inputs inputs = {
-      .debug = dbg,
       .gpu_id = panfrost_device_gpu_id(dev),
    };
 
@@ -201,6 +201,14 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
 
    screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
 
+   panfrost_stats_util_debug(dbg, gl_shader_stage_name(s->info.stage),
+                             &out->info.stats);
+
+   if (s->info.stage == MESA_SHADER_VERTEX && out->info.vs.idvs) {
+      panfrost_stats_util_debug(dbg, "MESA_SHADER_POSITION",
+                                &out->info.stats_idvs_varying);
+   }
+
    assert(req_local_mem >= out->info.wls_size);
    out->info.wls_size = req_local_mem;
 
diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c
index ce2738bd373..4d8eb9e3b95 100644
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@@ -28,6 +28,7 @@
 #include "compiler/glsl/glsl_to_nir.h"
 #include "compiler/glsl_types.h"
 #include "compiler/nir/nir_builder.h"
+#include "panfrost/util/pan_ir.h"
 #include "util/u_debug.h"
 
 #include "bifrost/disassemble.h"
@@ -4737,10 +4738,10 @@ bi_shader_stage_name(bi_context *ctx)
       return gl_shader_stage_name(ctx->stage);
 }
 
-static char *
-bi_print_stats(bi_context *ctx, unsigned size)
+static void
+bi_gather_stats(bi_context *ctx, unsigned size, struct bifrost_stats *out)
 {
-   struct bi_stats stats = {0};
+   struct bi_stats counts = {0};
 
    /* Count instructions, clauses, and tuples. Also attempt to construct
     * normalized execution engine cycle counts, using the following ratio:
@@ -4756,57 +4757,46 @@ bi_print_stats(bi_context *ctx, unsigned size)
 
    bi_foreach_block(ctx, block) {
       bi_foreach_clause_in_block(block, clause) {
-         stats.nr_clauses++;
-         stats.nr_tuples += clause->tuple_count;
+         counts.nr_clauses++;
+         counts.nr_tuples += clause->tuple_count;
 
          for (unsigned i = 0; i < clause->tuple_count; ++i)
-            bi_count_tuple_stats(clause, &clause->tuples[i], &stats);
+            bi_count_tuple_stats(clause, &clause->tuples[i], &counts);
       }
    }
 
-   float cycles_arith = ((float)stats.nr_arith) / 24.0;
-   float cycles_texture = ((float)stats.nr_texture) / 2.0;
-   float cycles_varying = ((float)stats.nr_varying) / 16.0;
-   float cycles_ldst = ((float)stats.nr_ldst) / 1.0;
-
-   float cycles_message = MAX3(cycles_texture, cycles_varying, cycles_ldst);
-   float cycles_bound = MAX2(cycles_arith, cycles_message);
-
    /* Thread count and register pressure are traded off only on v7 */
    bool full_threads = (ctx->arch == 7 && ctx->info.work_reg_count <= 32);
-   unsigned nr_threads = full_threads ? 2 : 1;
 
-   /* Dump stats */
-   char *str = ralloc_asprintf(
-      NULL,
-      "%s shader: "
-      "%u inst, %u tuples, %u clauses, "
-      "%f cycles, %f arith, %f texture, %f vary, %f ldst, "
-      "%u quadwords, %u threads",
-      bi_shader_stage_name(ctx), stats.nr_ins, stats.nr_tuples,
-      stats.nr_clauses, cycles_bound, cycles_arith, cycles_texture,
-      cycles_varying, cycles_ldst, size / 16, nr_threads);
+   *out = (struct bifrost_stats){
+      .instrs = counts.nr_ins,
+      .tuples = counts.nr_tuples,
+      .clauses = counts.nr_clauses,
+      .arith = ((float)counts.nr_arith) / 24.0,
+      .t = ((float)counts.nr_texture) / 2.0,
+      .v = ((float)counts.nr_varying) / 16.0,
+      .ldst = ((float)counts.nr_ldst) / 1.0,
+      .code_size = size,
+      .preloads = ctx->arch == 7 ? bi_count_preload_cost(ctx) : 0,
+      .threads = full_threads ? 2 : 1,
+      .loops = ctx->loop_count,
+      .spills = ctx->spills,
+      .fills = ctx->fills,
+   };
 
-   if (ctx->arch == 7) {
-      ralloc_asprintf_append(&str, ", %u preloads", bi_count_preload_cost(ctx));
-   }
-
-   ralloc_asprintf_append(&str, ", %u loops, %u:%u spills:fills",
-                          ctx->loop_count, ctx->spills, ctx->fills);
-
-   return str;
+   out->cycles = MAX2(out->arith, MAX3(out->t, out->v, out->ldst));
 }
 
-static char *
-va_print_stats(bi_context *ctx, unsigned size)
+static void
+va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out)
 {
    unsigned nr_ins = 0;
-   struct va_stats stats = {0};
+   struct va_stats counts = {0};
 
    /* Count instructions */
    bi_foreach_instr_global(ctx, I) {
       nr_ins++;
-      va_count_instr_stats(I, &stats);
+      va_count_instr_stats(I, &counts);
    }
 
    /* Mali G78 peak performance:
@@ -4818,31 +4808,24 @@ va_print_stats(bi_context *ctx, unsigned size)
     * 4 texture instructions per cycle
     * 1 load/store operation per cycle
     */
-
-   float cycles_fma = ((float)stats.fma) / 64.0;
-   float cycles_cvt = ((float)stats.cvt) / 64.0;
-   float cycles_sfu = ((float)stats.sfu) / 16.0;
-   float cycles_v = ((float)stats.v) / 16.0;
-   float cycles_t = ((float)stats.t) / 4.0;
-   float cycles_ls = ((float)stats.ls) / 1.0;
+   *out = (struct valhall_stats){
+      .instrs = nr_ins,
+      .code_size = size,
+      .fma = ((float)counts.fma) / 64.0,
+      .cvt = ((float)counts.cvt) / 64.0,
+      .sfu = ((float)counts.sfu) / 16.0,
+      .v = ((float)counts.v) / 16.0,
+      .t = ((float)counts.t) / 4.0,
+      .ls = ((float)counts.ls) / 1.0,
+      .threads = (ctx->info.work_reg_count <= 32) ? 2 : 1,
+      .loops = ctx->loop_count,
+      .spills = ctx->spills,
+      .fills = ctx->fills,
+   };
 
    /* Calculate the bound */
-   float cycles = MAX2(MAX3(cycles_fma, cycles_cvt, cycles_sfu),
-                       MAX3(cycles_v, cycles_t, cycles_ls));
-
-   /* Thread count and register pressure are traded off */
-   unsigned nr_threads = (ctx->info.work_reg_count <= 32) ? 2 : 1;
-
-   /* Dump stats */
-   return ralloc_asprintf(NULL,
-                          "%s shader: "
-                          "%u inst, %f cycles, %f fma, %f cvt, %f sfu, %f v, "
-                          "%f t, %f ls, %u quadwords, %u threads, %u loops, "
-                          "%u:%u spills:fills",
-                          bi_shader_stage_name(ctx), nr_ins, cycles, cycles_fma,
-                          cycles_cvt, cycles_sfu, cycles_v, cycles_t, cycles_ls,
-                          size / 16, nr_threads, ctx->loop_count, ctx->spills,
-                          ctx->fills);
+   out->cycles =
+      MAX2(MAX3(out->fma, out->cvt, out->sfu), MAX3(out->v, out->t, out->ls));
 }
 
 static int
@@ -5748,7 +5731,7 @@ static bi_context *
 bi_compile_variant_nir(nir_shader *nir,
                        const struct panfrost_compile_inputs *inputs,
                        struct util_dynarray *binary, struct bi_shader_info info,
-                       enum bi_idvs_mode idvs)
+                       struct panfrost_stats *stats, enum bi_idvs_mode idvs)
 {
    bi_context *ctx = rzalloc(NULL, bi_context);
 
@@ -5985,23 +5968,17 @@ bi_compile_variant_nir(nir_shader *nir,
       fflush(stdout);
    }
 
-   if (!skip_internal &&
-       ((bifrost_debug & BIFROST_DBG_SHADERDB) || inputs->debug)) {
-      char *shaderdb;
+   if (ctx->arch >= 9) {
+      stats->isa = PANFROST_STAT_VALHALL;
+      va_gather_stats(ctx, binary->size - offset, &stats->valhall);
+   } else {
+      stats->isa = PANFROST_STAT_BIFROST;
+      bi_gather_stats(ctx, binary->size - offset, &stats->bifrost);
+   }
 
-      if (ctx->arch >= 9) {
-         shaderdb = va_print_stats(ctx, binary->size - offset);
-      } else {
-         shaderdb = bi_print_stats(ctx, binary->size - offset);
-      }
-
-      if (bifrost_debug & BIFROST_DBG_SHADERDB)
-         fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
-
-      if (inputs->debug)
-         util_debug_message(inputs->debug, SHADER_INFO, "%s", shaderdb);
-
-      ralloc_free(shaderdb);
+   if ((bifrost_debug & BIFROST_DBG_SHADERDB) && !skip_internal) {
+      const char *prefix = bi_shader_stage_name(ctx);
+      panfrost_stats_fprintf(stderr, prefix, stats);
    }
 
    return ctx;
@@ -6034,8 +6011,11 @@ bi_compile_variant(nir_shader *nir,
     * offset, to keep the ABI simple. */
    assert((offset == 0) ^ (idvs == BI_IDVS_VARYING));
 
+   struct panfrost_stats *stats =
+      idvs == BI_IDVS_VARYING ? &info->stats_idvs_varying : &info->stats;
+
    bi_context *ctx =
-      bi_compile_variant_nir(nir, inputs, binary, local_info, idvs);
+      bi_compile_variant_nir(nir, inputs, binary, local_info, stats, idvs);
 
    /* A register is preloaded <==> it is live before the first block */
    bi_block *first_block = list_first_entry(&ctx->blocks, bi_block, link);
diff --git a/src/panfrost/compiler/compiler.h b/src/panfrost/compiler/compiler.h
index 672699add68..f27c7632ccf 100644
--- a/src/panfrost/compiler/compiler.h
+++ b/src/panfrost/compiler/compiler.h
@@ -30,6 +30,7 @@
 #include "compiler/nir/nir.h"
 #include "panfrost/util/pan_ir.h"
 #include "util/half_float.h"
+#include "util/shader_stats.h"
 #include "util/u_math.h"
 #include "util/u_worklist.h"
 #include "bi_opcodes.h"
@@ -834,6 +835,7 @@ bi_block_add_successor(bi_block *block, bi_block *successor)
 struct bi_shader_info {
    struct panfrost_ubo_push *push;
    struct bifrost_shader_info *bifrost;
+   struct panfrost_stats stats;
    unsigned tls_size;
    unsigned work_reg_count;
    unsigned push_offset;
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index c045b002c6c..39c50038c88 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -36,6 +36,7 @@
 #include "compiler/nir/nir_builder.h"
 #include "util/half_float.h"
 #include "util/list.h"
+#include "util/shader_stats.h"
 #include "util/u_debug.h"
 #include "util/u_dynarray.h"
 #include "util/u_math.h"
@@ -49,6 +50,7 @@
 #include "midgard_quirks.h"
 
 #include "disassemble.h"
+#include "shader_enums.h"
 
 static const struct debug_named_value midgard_debug_options[] = {
    {"shaders", MIDGARD_DBG_SHADERS, "Dump shaders in NIR and MIR"},
@@ -3155,51 +3157,34 @@ midgard_compile_shader_nir(nir_shader *nir,
    if (binary->size)
       memset(util_dynarray_grow(binary, uint8_t, 16), 0, 16);
 
-   if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->debug) &&
-       !nir->info.internal) {
-      unsigned nr_bundles = 0, nr_ins = 0;
+   struct midgard_stats stats = {
+      .quadwords = ctx->quadword_count,
+      .registers = info->work_reg_count,
+      .loops = ctx->loop_count,
+      .spills = ctx->spills,
+      .fills = ctx->fills,
+   };
 
-      /* Count instructions and bundles */
+   /* Count instructions and bundles */
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      stats.bundles +=
+         util_dynarray_num_elements(&block->bundles, midgard_bundle);
 
-      mir_foreach_block(ctx, _block) {
-         midgard_block *block = (midgard_block *)_block;
-         nr_bundles +=
-            util_dynarray_num_elements(&block->bundles, midgard_bundle);
+      mir_foreach_bundle_in_block(block, bun)
+         stats.inst += bun->instruction_count;
+   }
 
-         mir_foreach_bundle_in_block(block, bun)
-            nr_ins += bun->instruction_count;
-      }
+   /* Calculate thread count. There are certain cutoffs by
+    * register count for thread count */
+   stats.threads = (stats.registers <= 4) ? 4 : (stats.registers <= 8) ? 2 : 1;
 
-      /* Calculate thread count. There are certain cutoffs by
-       * register count for thread count */
+   info->stats.isa = PANFROST_STAT_MIDGARD;
+   info->stats.midgard = stats;
 
-      unsigned nr_registers = info->work_reg_count;
-
-      unsigned nr_threads = (nr_registers <= 4)   ? 4
-                            : (nr_registers <= 8) ? 2
-                                                  : 1;
-
-      char *shaderdb = NULL;
-
-      /* Dump stats */
-
-      asprintf(&shaderdb,
-               "%s shader: "
-               "%u inst, %u bundles, %u quadwords, "
-               "%u registers, %u threads, %u loops, "
-               "%u:%u spills:fills",
-               ctx->inputs->is_blend ? "PAN_SHADER_BLEND"
-                                     : gl_shader_stage_name(ctx->stage),
-               nr_ins, nr_bundles, ctx->quadword_count, nr_registers,
-               nr_threads, ctx->loop_count, ctx->spills, ctx->fills);
-
-      if (midgard_debug & MIDGARD_DBG_SHADERDB)
-         fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
-
-      if (inputs->debug)
-         util_debug_message(inputs->debug, SHADER_INFO, "%s", shaderdb);
-
-      free(shaderdb);
+   if ((midgard_debug & MIDGARD_DBG_SHADERDB) && !nir->info.internal) {
+      const char *prefix = _mesa_shader_stage_to_abbrev(ctx->stage);
+      midgard_stats_fprintf(stderr, prefix, &stats);
    }
 
    _mesa_hash_table_u64_destroy(ctx->ssa_constants);
diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h
index 0459c45af20..7920d0e072e 100644
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@@ -27,6 +27,7 @@
 #include <stdint.h>
 #include "compiler/nir/nir.h"
 #include "util/hash_table.h"
+#include "util/shader_stats.h"
 #include "util/u_dynarray.h"
 
 /* Indices for named (non-XFB) varyings that are present. These are packed
@@ -95,8 +96,6 @@ unsigned pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo,
                                unsigned offs);
 
 struct panfrost_compile_inputs {
-   struct util_debug_callback *debug;
-
    unsigned gpu_id;
    bool is_blend, is_blit;
    struct {
@@ -196,6 +195,8 @@ struct pan_shader_info {
    unsigned tls_size;
    unsigned wls_size;
 
+   struct panfrost_stats stats, stats_idvs_varying;
+
    /* Bit mask of preloaded registers */
    uint64_t preload;
 
diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c
index 55ad829114e..00d5982c3d3 100644
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@@ -41,11 +41,13 @@
 #include "spirv/nir_spirv.h"
 #include "util/memstream.h"
 #include "util/mesa-sha1.h"
+#include "util/shader_stats.h"
 #include "util/u_dynarray.h"
 #include "nir_builder.h"
 #include "nir_conversion_builder.h"
 #include "nir_deref.h"
 
+#include "shader_enums.h"
 #include "vk_graphics_state.h"
 #include "vk_nir_convert_ycbcr.h"
 #include "vk_shader_module.h"
@@ -1416,13 +1418,6 @@ panvk_shader_serialize(struct vk_device *vk_dev,
    return !blob->out_of_memory;
 }
 
-#define WRITE_STR(field, ...)                                                  \
-   ({                                                                          \
-      memset(field, 0, sizeof(field));                                         \
-      UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__);              \
-      assert(i > 0 && i < sizeof(field));                                      \
-   })
-
 static VkResult
 panvk_shader_get_executable_properties(
    UNUSED struct vk_device *device, const struct vk_shader *vk_shader,
@@ -1438,10 +1433,20 @@ panvk_shader_get_executable_properties(
    {
       props->stages = mesa_to_vk_shader_stage(shader->info.stage);
       props->subgroupSize = 8;
-      WRITE_STR(props->name, "%s",
-                _mesa_shader_stage_to_string(shader->info.stage));
-      WRITE_STR(props->description, "%s shader",
-                _mesa_shader_stage_to_string(shader->info.stage));
+      VK_COPY_STR(props->name,
+                  _mesa_shader_stage_to_string(shader->info.stage));
+      VK_PRINT_STR(props->description, "%s shader",
+                   _mesa_shader_stage_to_string(shader->info.stage));
+   }
+
+   if (shader->info.stage == MESA_SHADER_VERTEX && shader->info.vs.idvs) {
+      vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props)
+      {
+         props->stages = mesa_to_vk_shader_stage(shader->info.stage);
+         props->subgroupSize = 8;
+         VK_COPY_STR(props->name, "varying");
+         VK_COPY_STR(props->description, "Varying shader");
+      }
    }
 
    return vk_outarray_status(&out);
@@ -1459,19 +1464,11 @@ panvk_shader_get_executable_statistics(
    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, statistics,
                           statistic_count);
 
-   assert(executable_index == 0);
-
-   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat)
-   {
-      WRITE_STR(stat->name, "Code Size");
-      WRITE_STR(stat->description,
-                "Size of the compiled shader binary, in bytes");
-      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-      stat->value.u64 = shader->bin_size;
-   }
-
-   /* TODO: more executable statistics (VK_KHR_pipeline_executable_properties) */
+   assert(executable_index == 0 || executable_index == 1);
+   struct panfrost_stats *stats =
+      executable_index ? &shader->info.stats_idvs_varying : &shader->info.stats;
 
+   vk_add_panfrost_stats(out, stats);
    return vk_outarray_status(&out);
 }
 
@@ -1513,8 +1510,8 @@ panvk_shader_get_executable_internal_representations(
       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
                                &out, ir)
       {
-         WRITE_STR(ir->name, "NIR shader");
-         WRITE_STR(ir->description,
+         VK_COPY_STR(ir->name, "NIR shader");
+         VK_COPY_STR(ir->description,
                    "NIR shader before sending to the back-end compiler");
          if (!write_ir_text(ir, shader->nir_str))
             incomplete_text = true;
@@ -1525,8 +1522,8 @@ panvk_shader_get_executable_internal_representations(
       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
                                &out, ir)
       {
-         WRITE_STR(ir->name, "Assembly");
-         WRITE_STR(ir->description, "Final Assembly");
+         VK_COPY_STR(ir->name, "Assembly");
+         VK_COPY_STR(ir->description, "Final Assembly");
          if (!write_ir_text(ir, shader->asm_str))
             incomplete_text = true;
       }
diff --git a/src/util/shader_stats.xml b/src/util/shader_stats.xml
index 89b37e1e3fa..6a3c84eb742 100644
--- a/src/util/shader_stats.xml
+++ b/src/util/shader_stats.xml
@@ -13,4 +13,50 @@
       <stat name="Spills">Number of spill (stack store) instructions</stat>
       <stat name="Fills">Number of fill (stack load) instructions</stat>
     </isa>
+
+   <family name="Panfrost">
+     <isa name="Midgard">
+        <stat name="Instructions" display="Inst">Instruction count</stat>
+        <stat name="Bundles">Instruction bundles</stat>
+        <stat name="Registers" type="u16">Register usage in vec4s</stat>
+        <stat name="Threads" more="better" type="u16">Maximum number of threads in flight on a compute unit</stat>
+        <stat name="Quadwords">Binary size in quadwords</stat>
+        <stat name="Loops">Number of hardware loops</stat>
+        <stat name="Spills">Number of spill instructions</stat>
+        <stat name="Fills">Number of fill instructions</stat>
+     </isa>
+
+     <isa name="Bifrost">
+        <stat name="Instructions" display="Instrs">Instruction count</stat>
+        <stat name="Tuples">Tuple count</stat>
+        <stat name="Clauses">Clause count</stat>
+        <stat name="Cycles" type="float">Estimated normalized cycles</stat>
+        <stat name="Arithmetic" display="Arith" type="float">Estimated normalized arithmetic cycles</stat>
+        <stat name="Texture" display="T" type="float">Estimated normalized Texture cycles</stat>
+        <stat name="Load/store" display="LDST" type="float">Estimated normalized Load/Store cycles</stat>
+        <stat name="Varying" display="V" type="float">Estimated normalized Varying cycles</stat>
+        <stat name="Preloads" type="u16">Preload count</stat>
+        <stat name="Threads" more="better" type="u16">Maximum number of threads in flight on a compute unit</stat>
+        <stat name="Code size">Binary size in bytes</stat>
+        <stat name="Loops">Number of hardware loops</stat>
+        <stat name="Spills">Number of spill instructions</stat>
+        <stat name="Fills">Number of fill instructions</stat>
+     </isa>
+
+     <isa name="Valhall">
+        <stat name="Instructions" display="Instrs">Instruction count</stat>
+        <stat name="Cycles" type="float">Estimated normalized cycles</stat>
+        <stat name="FMA" type="float">Estimated normalized FMA (Fused Multiply-Add) cycles</stat>
+        <stat name="CVT" type="float">Estimated normalized CVT (ConVerT) cycles</stat>
+        <stat name="SFU" type="float">Estimated normalized SFU (Special Function Unit) cycles</stat>
+        <stat name="Varying" display="V" type="float">Estimated normalized Varying cycles</stat>
+        <stat name="Texture" display="T" type="float">Estimated normalized Texture cycles</stat>
+        <stat name="Load/store" display="LS" type="float">Estimated normalized Load/Store cycles</stat>
+        <stat name="Code size">Binary size in bytes</stat>
+        <stat name="Threads" more="better" type="u16">Maximum number of threads in flight on a compute unit</stat>
+        <stat name="Loops">Number of hardware loops</stat>
+        <stat name="Spills">Number of spill instructions</stat>
+        <stat name="Fills">Number of fill instructions</stat>
+     </isa>
+   </family>
 </shaderdb>