panfrost/panvk: Add size calculations to compiler register code

This helps us to more accurately count the number of registers that need to be spilled to keep us below the maximum. Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37188>
2026-01-04 04:50:11 +01:00 · 2025-09-03 12:42:08 +00:00 · 2025-09-03 12:42:08 +00:00 · 5e380ca7e5
commit 5e380ca7e5
parent 43d9765e35
2 changed files with 53 additions and 9 deletions
--- a/src/panfrost/compiler/bi_ra_ssa.c
+++ b/src/panfrost/compiler/bi_ra_ssa.c
@ -49,10 +49,22 @@ bi_calc_register_demand(bi_context *ctx)
         unsigned v = I->dest[d].value;
         assert(widths[v] == 0 && "broken SSA");
         /* Round up vectors for easier live range splitting */
-         widths[v] = 1;
+         widths[v] = bi_count_write_registers(I, d);
         classes[v] = ra_class_for_index(I->dest[d]);
      }
   }
+   /* now that we know the rest of the sizes, find the sizes for PHI nodes */
+   bi_foreach_block(ctx, block) {
+      bi_foreach_phi_in_block(block, I) {
+         if (I->dest[0].type != BI_INDEX_NORMAL)
+            continue;
+         unsigned idx = I->dest[0].value;
+         widths[idx] = 1;
+         bi_foreach_ssa_src(I, s) {
+            widths[idx] = MAX2(widths[idx], widths[I->src[s].value]);
+         }
+      }
+   }

   /* Calculate demand at the start of each block based on live-in, then update
    * for each instruction processed. Calculate rolling maximum.
--- a/src/panfrost/compiler/bi_spill_ssa.c
+++ b/src/panfrost/compiler/bi_spill_ssa.c
@ -179,6 +179,9 @@ struct spill_ctx {
    */
   BITSET_WORD *S;

+   /* Widths of vectors */
+   uint32_t *size;
+
   /* Mapping of rematerializable values to their definitions, or NULL for nodes
    * that are not materializable.
    */
@ -219,12 +222,11 @@ spill_block(struct spill_ctx *ctx, bi_block *block)

 /* Calculate the register demand of a node. This should be rounded up to
 * a power-of-two to match the equivalent calculations in RA.
- * For now just punt and return 1, but we'll want to revisit this later.
 */
 static inline unsigned
 node_size(struct spill_ctx *ctx, unsigned node)
 {
-   return 1;
+   return ctx->size[node];
 }

 /*
@ -461,7 +463,8 @@ cmp_dist(const void *left_, const void *right_, void *ctx_)
   struct spill_ctx *ctx = ctx_;
   const struct candidate *left = left_;
   const struct candidate *right = right_;
-
+   unsigned ldist = left->dist;
+   unsigned rdist = right->dist;
   /* We assume that rematerializing - even before every instruction - is
    * cheaper than spilling. As long as one of the nodes is rematerializable
    * (with distance > 0), we choose it over spilling. Within a class of nodes
@ -469,13 +472,13 @@ cmp_dist(const void *left_, const void *right_, void *ctx_)
    */
   assert(left->node < ctx->n_alloc);
   assert(right->node < ctx->n_alloc);
-   bool remat_left = ctx->remat[left->node] != NULL && left->dist > 0;
-   bool remat_right = ctx->remat[right->node] != NULL && right->dist > 0;
+   bool remat_left = ctx->remat[left->node] != NULL && ldist > 0;
+   bool remat_right = ctx->remat[right->node] != NULL && rdist > 0;

   if (remat_left != remat_right)
      return remat_left ? 1 : -1;
   else
-      return (left->dist > right->dist) - (left->dist < right->dist);
+      return (ldist > rdist) - (ldist < rdist);
 }

 /*
@ -1293,13 +1296,40 @@ bi_spill_ssa(bi_context *ctx, unsigned k, unsigned spill_base)

   dist_t *next_uses = rzalloc_array(memctx, dist_t, ctx->ssa_alloc + max_temps);
   bi_instr **remat = rzalloc_array(memctx, bi_instr *, ctx->ssa_alloc + max_temps);
+   uint32_t *sizes = rzalloc_array(memctx, uint32_t, ctx->ssa_alloc + max_temps);

   /* now record instructions that can be easily re-materialized */
+   /* while we're at it, calculate sizes too */
   bi_foreach_instr_global(ctx, I) {
+      if (I->nr_dests == 0 || I->dest[0].type != BI_INDEX_NORMAL)
+         continue;
+      unsigned idx = I->dest[0].value;
      if (can_remat(I))
-         remat[I->dest[0].value] = I;
+         remat[idx] = I;
+      bi_foreach_ssa_dest(I, d) {
+         idx = I->dest[d].value;
+         assert(sizes[idx] == 0 && "SSA broken");
+         switch (I->op) {
+         case BI_OPCODE_PHI:
+            break;
+         default:
+            sizes[idx] = bi_count_write_registers(I, d);
+            break;
+         }
+      }
+   }
+   /* now that we know the rest of the sizes, find the sizes for PHI nodes */
+   bi_foreach_block(ctx, block) {
+      bi_foreach_phi_in_block(block, I) {
+         if (I->dest[0].type != BI_INDEX_NORMAL)
+            continue;
+         unsigned idx = I->dest[0].value;
+         sizes[idx] = 1;
+         bi_foreach_ssa_src(I, s) {
+            sizes[idx] = MAX2(sizes[idx], sizes[I->src[s].value]);
+         }
+      }
   }
-
   struct spill_block *blocks =
      rzalloc_array(memctx, struct spill_block, ctx->num_blocks);

@ -1333,6 +1363,7 @@ bi_spill_ssa(bi_context *ctx, unsigned k, unsigned spill_base)
         .k = k,
         .W = W,
         .S = S,
+         .size = sizes,
         .spill_max = n,
         .spill_base = spill_base,
         .spill_map = spill_map,
@ -1359,6 +1390,7 @@ bi_spill_ssa(bi_context *ctx, unsigned k, unsigned spill_base)
         .k = k,
         .W = W,
         .S = S,
+         .size = sizes,
         .spill_max = n,
         .spill_base = spill_base,
         .spill_map = spill_map,