From d55bdb4ec58a916f0e8e052f4076f3a57f51a707 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 30 Jun 2025 18:43:59 -0400 Subject: [PATCH] nir/opt_preamble: add "register class" concept Class represents an indexed "ideal" register class, where non-general classes only allow defs that choose that class in the def_size callback. nir_opt_preamble will try to assign specialized classes where possible, falling back to the general class once the special-purpose classes are exhausted. AGX will use this mechanism to promote bindless texture handles to bound texture registers where possible, falling back to pushing the handle as a uniform where not possible. Supporting multiple classes in nir_opt_preamble allows this multi-level hoisting to work in a single nir_opt_preamble call with proper global behaviour. Add this concept to nir_opt_preamble so we can use it in AGX later in this MR. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Job Noorman Part-of: --- src/asahi/compiler/agx_nir_opt_preamble.c | 6 ++- src/compiler/nir/nir.h | 25 +++++++++++-- src/compiler/nir/nir_intrinsics.py | 8 +++- src/compiler/nir/nir_opt_preamble.c | 45 +++++++++++++++++------ src/compiler/nir/nir_print.c | 14 +++++++ src/freedreno/ir3/ir3_nir_opt_preamble.c | 6 ++- 6 files changed, 82 insertions(+), 22 deletions(-) diff --git a/src/asahi/compiler/agx_nir_opt_preamble.c b/src/asahi/compiler/agx_nir_opt_preamble.c index 1215d31803c..ee2c8243ca1 100644 --- a/src/asahi/compiler/agx_nir_opt_preamble.c +++ b/src/asahi/compiler/agx_nir_opt_preamble.c @@ -11,12 +11,14 @@ #include "nir_opcodes.h" static void -def_size(nir_def *def, unsigned *size, unsigned *align) +def_size(nir_def *def, unsigned *size, unsigned *align, + nir_preamble_class *class) { unsigned bit_size = MAX2(def->bit_size, 16); *size = (bit_size * def->num_components) / 16; *align = bit_size / 16; + *class = nir_preamble_class_general; } static bool @@ -330,7 +332,7 @@ static const nir_opt_preamble_options preamble_options = { * hot constants so we don't end up rematerializing all over the place. * 480 seems to be a sweetspot, based on a few minutes of shader-db. */ - .preamble_storage_size = 480, + .preamble_storage_size[nir_preamble_class_general] = 480, }; bool diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index b724832e357..0afa61aab86 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -224,6 +224,16 @@ typedef enum { nir_resource_intel_sampler_embedded = 1u << 4, } nir_resource_data_intel; +/** + * Register class for registers managed by nir_opt_preamble. General can handle + * anything, the others are driver-specific but with common names for nir_print. + */ +typedef enum { + nir_preamble_class_general, + nir_preamble_class_image, + nir_preamble_num_classes, +} nir_preamble_class; + /** * Which components to interpret as signed in cmat_muladd. * See 'Cooperative Matrix Operands' in SPV_KHR_cooperative_matrix. @@ -6343,13 +6353,20 @@ typedef struct nir_opt_preamble_options { /* True if load_workgroup_size is supported in the preamble. */ bool load_workgroup_size_allowed; - /* size/align for load/store_preamble. */ - void (*def_size)(nir_def *def, unsigned *size, unsigned *align); + /* size/align/class for load/store_preamble. + * + * Defs with class "general" will always be allocated as general. Other + * classes will attempt to allocate as the specialized class but may fallback + * to general. This mechanism enables "tiered" classes in a single + * nir_opt_preamble call with proper global behaviour. + */ + void (*def_size)(nir_def *def, unsigned *size, unsigned *align, + nir_preamble_class *class_); - /* Total available size for load/store_preamble storage, in units + /* Total available size per class for load/store_preamble storage, in units * determined by def_size. */ - unsigned preamble_storage_size; + unsigned preamble_storage_size[nir_preamble_num_classes]; /* Give the cost for an instruction. nir_opt_preamble will prioritize * instructions with higher costs. Instructions with cost 0 may still be diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 700dc166896..f5e24610318 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -347,6 +347,9 @@ index("bool", "explicit_coord") # The index of the format string used by a printf. (u_printf_info element of the shader) index("unsigned", "fmt_idx") +# Register class for load/store_preamble +index("nir_preamble_class", "preamble_class") + intrinsic("nop", flags=[CAN_ELIMINATE]) # Uses a value and cannot be eliminated. @@ -1338,8 +1341,9 @@ load("mesh_view_indices", [1], [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) # This should use something similar to Vulkan push constants and load_preamble # should be relatively cheap. # For now we only support accesses with a constant offset. -load("preamble", [], indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER]) -store("preamble", [], indices=[BASE]) +load("preamble", [], indices=[BASE, PREAMBLE_CLASS], + flags=[CAN_ELIMINATE, CAN_REORDER]) +store("preamble", [], indices=[BASE, PREAMBLE_CLASS]) # A 64-bit bitfield indexed by I/O location storing 1 in bits corresponding to # varyings that have the flat interpolation specifier in the fragment shader and diff --git a/src/compiler/nir/nir_opt_preamble.c b/src/compiler/nir/nir_opt_preamble.c index 20ef7aca06d..6ad2a5c1950 100644 --- a/src/compiler/nir/nir_opt_preamble.c +++ b/src/compiler/nir/nir_opt_preamble.c @@ -57,6 +57,7 @@ typedef struct { unsigned can_move_users; unsigned size, align; + nir_preamble_class class; unsigned offset; @@ -549,7 +550,8 @@ replace_for_block(nir_builder *b, opt_preamble_ctx *ctx, if (state->replace) { nir_def *clone_def = nir_instr_def(clone); - nir_store_preamble(b, clone_def, .base = state->offset); + nir_store_preamble(b, clone_def, .base = state->offset, + .preamble_class = state->class); } } } @@ -823,6 +825,7 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options, def_state **candidates = malloc(sizeof(*candidates) * num_candidates); unsigned candidate_idx = 0; unsigned total_size = 0; + bool multiple_classes = false; /* Step 3: Calculate value of candidates by propagating downwards. We try * to share the value amongst can_move uses, in case there are multiple. @@ -866,10 +869,12 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options, options->rewrite_cost_cb(def, options->cb_data); if (state->benefit > 0) { - options->def_size(def, &state->size, &state->align); + options->def_size(def, &state->size, &state->align, + &state->class); total_size = ALIGN_POT(total_size, state->align); total_size += state->size; candidates[candidate_idx++] = state; + multiple_classes |= (state->class != nir_preamble_class_general); } } } @@ -890,27 +895,42 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options, * alignment. We use a well-known greedy approximation, sorting by value * divided by size. */ + if (multiple_classes || + (((*size) + total_size) > options->preamble_storage_size[0])) { - if (((*size) + total_size) > options->preamble_storage_size) { qsort(candidates, num_candidates, sizeof(*candidates), candidate_sort); } - unsigned offset = *size; for (unsigned i = 0; i < num_candidates; i++) { def_state *state = candidates[i]; - offset = ALIGN_POT(offset, state->align); + nir_preamble_class c = state->class; + size[c] = ALIGN_POT(size[c], state->align); - if (offset + state->size > options->preamble_storage_size) - break; + assert(c < ARRAY_SIZE(options->preamble_storage_size)); + + if (size[c] + state->size > options->preamble_storage_size[c]) { + /* If there's only a single class and it's full, early-exit. If we have + * multiple classes, we do not early-exit as one class filling up does + * not necessarily mean the others are. This could be optimized but + * it doesn't really matter. + */ + if (!multiple_classes) + break; + + /* Try falling back on on the default class */ + state->class = nir_preamble_class_general; + c = state->class; + size[c] = ALIGN_POT(size[c], state->align); + if (size[c] + state->size > options->preamble_storage_size[c]) + continue; + } state->replace = true; - state->offset = offset; + state->offset = size[c]; - offset += state->size; + size[c] += state->size; } - *size = offset; - free(candidates); /* Determine which if's need to be reconstructed, based on the replacements @@ -959,7 +979,8 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options, nir_def *new_def = nir_load_preamble(b, def->num_components, def->bit_size, - .base = state->offset); + .base = state->offset, + .preamble_class = state->class); nir_def_rewrite_uses(def, new_def); nir_instr_free_and_dce(instr); diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 00b9d8c8a29..15eb7094354 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1232,6 +1232,12 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) for (unsigned i = 0; i < info->num_indices; i++) { unsigned idx = info->indices[i]; + + /* Skip "general" to denoise since it is the unremarkable default case */ + if (idx == NIR_INTRINSIC_PREAMBLE_CLASS && + nir_intrinsic_preamble_class(instr) == nir_preamble_class_general) + continue; + if (i == 0) fprintf(fp, " ("); else @@ -1700,6 +1706,14 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) glsl_interp_mode_name(nir_intrinsic_interp_mode(instr))); break; + case NIR_INTRINSIC_PREAMBLE_CLASS: { + /* "General" handled above */ + nir_preamble_class cls = nir_intrinsic_preamble_class(instr); + assert(cls == nir_preamble_class_image); + fprintf(fp, "class=image"); + break; + } + default: { unsigned off = info->index_map[idx] - 1; fprintf(fp, "%s=%d", nir_intrinsic_index_names[idx], instr->const_index[off]); diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c b/src/freedreno/ir3/ir3_nir_opt_preamble.c index 73ff82d9221..64ae33846cf 100644 --- a/src/freedreno/ir3/ir3_nir_opt_preamble.c +++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c @@ -16,7 +16,8 @@ */ static void -def_size(nir_def *def, unsigned *size, unsigned *align) +def_size(nir_def *def, unsigned *size, unsigned *align, + nir_preamble_class *class) { unsigned bit_size = def->bit_size == 1 ? 32 : def->bit_size; /* Due to the implicit const file promotion we want to expand 16-bit values @@ -25,6 +26,7 @@ def_size(nir_def *def, unsigned *size, unsigned *align) */ *size = DIV_ROUND_UP(bit_size, 32) * def->num_components; *align = 1; + *class = nir_preamble_class_general; } static bool @@ -319,7 +321,7 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v) .subgroup_size_uniform = true, .load_workgroup_size_allowed = true, .def_size = def_size, - .preamble_storage_size = max_size, + .preamble_storage_size[nir_preamble_class_general] = max_size, .instr_cost_cb = instr_cost, .avoid_instr_cb = avoid_instr, .rewrite_cost_cb = rewrite_cost,