nir/opt_preamble: add "register class" concept

Class represents an indexed "ideal" register class, where non-general classes only allow defs that choose that class in the def_size callback. nir_opt_preamble will try to assign specialized classes where possible, falling back to the general class once the special-purpose classes are exhausted. AGX will use this mechanism to promote bindless texture handles to bound texture registers where possible, falling back to pushing the handle as a uniform where not possible. Supporting multiple classes in nir_opt_preamble allows this multi-level hoisting to work in a single nir_opt_preamble call with proper global behaviour. Add this concept to nir_opt_preamble so we can use it in AGX later in this MR. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Job Noorman <job@noorman.info> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35949>
2026-05-06 02:58:05 +02:00 · 2025-06-30 18:43:59 -04:00 · 2025-06-30 18:43:59 -04:00 · d55bdb4ec5
commit d55bdb4ec5
parent 89403487b1
6 changed files with 82 additions and 22 deletions
--- a/src/asahi/compiler/agx_nir_opt_preamble.c
+++ b/src/asahi/compiler/agx_nir_opt_preamble.c
@ -11,12 +11,14 @@
 #include "nir_opcodes.h"

 static void
-def_size(nir_def *def, unsigned *size, unsigned *align)
+def_size(nir_def *def, unsigned *size, unsigned *align,
+         nir_preamble_class *class)
 {
   unsigned bit_size = MAX2(def->bit_size, 16);

   *size = (bit_size * def->num_components) / 16;
   *align = bit_size / 16;
+   *class = nir_preamble_class_general;
 }

 static bool
@ -330,7 +332,7 @@ static const nir_opt_preamble_options preamble_options = {
    * hot constants so we don't end up rematerializing all over the place.
    * 480 seems to be a sweetspot, based on a few minutes of shader-db.
    */
-   .preamble_storage_size = 480,
+   .preamble_storage_size[nir_preamble_class_general] = 480,
 };

 bool
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -224,6 +224,16 @@ typedef enum {
   nir_resource_intel_sampler_embedded = 1u << 4,
 } nir_resource_data_intel;

+/**
+ * Register class for registers managed by nir_opt_preamble. General can handle
+ * anything, the others are driver-specific but with common names for nir_print.
+ */
+typedef enum {
+   nir_preamble_class_general,
+   nir_preamble_class_image,
+   nir_preamble_num_classes,
+} nir_preamble_class;
+
 /**
 * Which components to interpret as signed in cmat_muladd.
 * See 'Cooperative Matrix Operands' in SPV_KHR_cooperative_matrix.
@ -6343,13 +6353,20 @@ typedef struct nir_opt_preamble_options {
   /* True if load_workgroup_size is supported in the preamble. */
   bool load_workgroup_size_allowed;

-   /* size/align for load/store_preamble. */
-   void (*def_size)(nir_def *def, unsigned *size, unsigned *align);
+   /* size/align/class for load/store_preamble.
+    *
+    * Defs with class "general" will always be allocated as general. Other
+    * classes will attempt to allocate as the specialized class but may fallback
+    * to general. This mechanism enables "tiered" classes in a single
+    * nir_opt_preamble call with proper global behaviour.
+    */
+   void (*def_size)(nir_def *def, unsigned *size, unsigned *align,
+                    nir_preamble_class *class_);

-   /* Total available size for load/store_preamble storage, in units
+   /* Total available size per class for load/store_preamble storage, in units
    * determined by def_size.
    */
-   unsigned preamble_storage_size;
+   unsigned preamble_storage_size[nir_preamble_num_classes];

   /* Give the cost for an instruction. nir_opt_preamble will prioritize
    * instructions with higher costs. Instructions with cost 0 may still be
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@ -347,6 +347,9 @@ index("bool", "explicit_coord")
 # The index of the format string used by a printf. (u_printf_info element of the shader)
 index("unsigned", "fmt_idx")

+# Register class for load/store_preamble
+index("nir_preamble_class", "preamble_class")
+
 intrinsic("nop", flags=[CAN_ELIMINATE])

 # Uses a value and cannot be eliminated.
@ -1338,8 +1341,9 @@ load("mesh_view_indices", [1], [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
 # This should use something similar to Vulkan push constants and load_preamble
 # should be relatively cheap.
 # For now we only support accesses with a constant offset.
-load("preamble", [], indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
-store("preamble", [], indices=[BASE])
+load("preamble", [], indices=[BASE, PREAMBLE_CLASS],
+     flags=[CAN_ELIMINATE, CAN_REORDER])
+store("preamble", [], indices=[BASE, PREAMBLE_CLASS])

 # A 64-bit bitfield indexed by I/O location storing 1 in bits corresponding to
 # varyings that have the flat interpolation specifier in the fragment shader and
--- a/src/compiler/nir/nir_opt_preamble.c
+++ b/src/compiler/nir/nir_opt_preamble.c
@ -57,6 +57,7 @@ typedef struct {
   unsigned can_move_users;

   unsigned size, align;
+   nir_preamble_class class;

   unsigned offset;

@ -549,7 +550,8 @@ replace_for_block(nir_builder *b, opt_preamble_ctx *ctx,

      if (state->replace) {
         nir_def *clone_def = nir_instr_def(clone);
-         nir_store_preamble(b, clone_def, .base = state->offset);
+         nir_store_preamble(b, clone_def, .base = state->offset,
+                            .preamble_class = state->class);
      }
   }
 }
@ -823,6 +825,7 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
   def_state **candidates = malloc(sizeof(*candidates) * num_candidates);
   unsigned candidate_idx = 0;
   unsigned total_size = 0;
+   bool multiple_classes = false;

   /* Step 3: Calculate value of candidates by propagating downwards. We try
    * to share the value amongst can_move uses, in case there are multiple.
@ -866,10 +869,12 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
                             options->rewrite_cost_cb(def, options->cb_data);

            if (state->benefit > 0) {
-               options->def_size(def, &state->size, &state->align);
+               options->def_size(def, &state->size, &state->align,
+                                 &state->class);
               total_size = ALIGN_POT(total_size, state->align);
               total_size += state->size;
               candidates[candidate_idx++] = state;
+               multiple_classes |= (state->class != nir_preamble_class_general);
            }
         }
      }
@ -890,27 +895,42 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
    * alignment. We use a well-known greedy approximation, sorting by value
    * divided by size.
    */
+   if (multiple_classes ||
+       (((*size) + total_size) > options->preamble_storage_size[0])) {

-   if (((*size) + total_size) > options->preamble_storage_size) {
      qsort(candidates, num_candidates, sizeof(*candidates), candidate_sort);
   }

-   unsigned offset = *size;
   for (unsigned i = 0; i < num_candidates; i++) {
      def_state *state = candidates[i];
-      offset = ALIGN_POT(offset, state->align);
+      nir_preamble_class c = state->class;
+      size[c] = ALIGN_POT(size[c], state->align);

-      if (offset + state->size > options->preamble_storage_size)
-         break;
+      assert(c < ARRAY_SIZE(options->preamble_storage_size));
+
+      if (size[c] + state->size > options->preamble_storage_size[c]) {
+         /* If there's only a single class and it's full, early-exit. If we have
+          * multiple classes, we do not early-exit as one class filling up does
+          * not necessarily mean the others are. This could be optimized but
+          * it doesn't really matter.
+          */
+         if (!multiple_classes)
+            break;
+
+         /* Try falling back on on the default class */
+         state->class = nir_preamble_class_general;
+         c = state->class;
+         size[c] = ALIGN_POT(size[c], state->align);
+         if (size[c] + state->size > options->preamble_storage_size[c])
+            continue;
+      }

      state->replace = true;
-      state->offset = offset;
+      state->offset = size[c];

-      offset += state->size;
+      size[c] += state->size;
   }

-   *size = offset;
-
   free(candidates);

   /* Determine which if's need to be reconstructed, based on the replacements
@ -959,7 +979,8 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,

         nir_def *new_def =
            nir_load_preamble(b, def->num_components, def->bit_size,
-                              .base = state->offset);
+                              .base = state->offset,
+                              .preamble_class = state->class);

         nir_def_rewrite_uses(def, new_def);
         nir_instr_free_and_dce(instr);
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@ -1232,6 +1232,12 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)

   for (unsigned i = 0; i < info->num_indices; i++) {
      unsigned idx = info->indices[i];
+
+      /* Skip "general" to denoise since it is the unremarkable default case */
+      if (idx == NIR_INTRINSIC_PREAMBLE_CLASS &&
+          nir_intrinsic_preamble_class(instr) == nir_preamble_class_general)
+         continue;
+
      if (i == 0)
         fprintf(fp, " (");
      else
@ -1700,6 +1706,14 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
                 glsl_interp_mode_name(nir_intrinsic_interp_mode(instr)));
         break;

+      case NIR_INTRINSIC_PREAMBLE_CLASS: {
+         /* "General" handled above */
+         nir_preamble_class cls = nir_intrinsic_preamble_class(instr);
+         assert(cls == nir_preamble_class_image);
+         fprintf(fp, "class=image");
+         break;
+      }
+
      default: {
         unsigned off = info->index_map[idx] - 1;
         fprintf(fp, "%s=%d", nir_intrinsic_index_names[idx], instr->const_index[off]);
--- a/src/freedreno/ir3/ir3_nir_opt_preamble.c
+++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c
@ -16,7 +16,8 @@
 */

 static void
-def_size(nir_def *def, unsigned *size, unsigned *align)
+def_size(nir_def *def, unsigned *size, unsigned *align,
+         nir_preamble_class *class)
 {
   unsigned bit_size = def->bit_size == 1 ? 32 : def->bit_size;
   /* Due to the implicit const file promotion we want to expand 16-bit values
@ -25,6 +26,7 @@ def_size(nir_def *def, unsigned *size, unsigned *align)
    */
   *size = DIV_ROUND_UP(bit_size, 32) * def->num_components;
   *align = 1;
+   *class = nir_preamble_class_general;
 }

 static bool
@ -319,7 +321,7 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v)
      .subgroup_size_uniform = true,
      .load_workgroup_size_allowed = true,
      .def_size = def_size,
-      .preamble_storage_size = max_size,
+      .preamble_storage_size[nir_preamble_class_general] = max_size,
      .instr_cost_cb = instr_cost,
      .avoid_instr_cb = avoid_instr,
      .rewrite_cost_cb = rewrite_cost,