nir/opt_preamble: add "register class" concept

Class represents an indexed "ideal" register class, where non-general classes
only allow defs that choose that class in the def_size callback.
nir_opt_preamble will try to assign specialized classes where possible, falling
back to the general class once the special-purpose classes are exhausted.

AGX will use this mechanism to promote bindless texture handles to bound texture
registers where possible, falling back to pushing the handle as a uniform where
not possible. Supporting multiple classes in nir_opt_preamble allows this
multi-level hoisting to work in a single nir_opt_preamble call with proper
global behaviour.

Add this concept to nir_opt_preamble so we can use it in AGX later in this MR.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Job Noorman <job@noorman.info>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35949>
This commit is contained in:
Alyssa Rosenzweig 2025-06-30 18:43:59 -04:00
parent 89403487b1
commit d55bdb4ec5
6 changed files with 82 additions and 22 deletions

View file

@ -11,12 +11,14 @@
#include "nir_opcodes.h"
static void
def_size(nir_def *def, unsigned *size, unsigned *align)
def_size(nir_def *def, unsigned *size, unsigned *align,
nir_preamble_class *class)
{
unsigned bit_size = MAX2(def->bit_size, 16);
*size = (bit_size * def->num_components) / 16;
*align = bit_size / 16;
*class = nir_preamble_class_general;
}
static bool
@ -330,7 +332,7 @@ static const nir_opt_preamble_options preamble_options = {
* hot constants so we don't end up rematerializing all over the place.
* 480 seems to be a sweetspot, based on a few minutes of shader-db.
*/
.preamble_storage_size = 480,
.preamble_storage_size[nir_preamble_class_general] = 480,
};
bool

View file

@ -224,6 +224,16 @@ typedef enum {
nir_resource_intel_sampler_embedded = 1u << 4,
} nir_resource_data_intel;
/**
* Register class for registers managed by nir_opt_preamble. General can handle
* anything, the others are driver-specific but with common names for nir_print.
*/
typedef enum {
nir_preamble_class_general,
nir_preamble_class_image,
nir_preamble_num_classes,
} nir_preamble_class;
/**
* Which components to interpret as signed in cmat_muladd.
* See 'Cooperative Matrix Operands' in SPV_KHR_cooperative_matrix.
@ -6343,13 +6353,20 @@ typedef struct nir_opt_preamble_options {
/* True if load_workgroup_size is supported in the preamble. */
bool load_workgroup_size_allowed;
/* size/align for load/store_preamble. */
void (*def_size)(nir_def *def, unsigned *size, unsigned *align);
/* size/align/class for load/store_preamble.
*
* Defs with class "general" will always be allocated as general. Other
* classes will attempt to allocate as the specialized class but may fallback
* to general. This mechanism enables "tiered" classes in a single
* nir_opt_preamble call with proper global behaviour.
*/
void (*def_size)(nir_def *def, unsigned *size, unsigned *align,
nir_preamble_class *class_);
/* Total available size for load/store_preamble storage, in units
/* Total available size per class for load/store_preamble storage, in units
* determined by def_size.
*/
unsigned preamble_storage_size;
unsigned preamble_storage_size[nir_preamble_num_classes];
/* Give the cost for an instruction. nir_opt_preamble will prioritize
* instructions with higher costs. Instructions with cost 0 may still be

View file

@ -347,6 +347,9 @@ index("bool", "explicit_coord")
# The index of the format string used by a printf. (u_printf_info element of the shader)
index("unsigned", "fmt_idx")
# Register class for load/store_preamble
index("nir_preamble_class", "preamble_class")
intrinsic("nop", flags=[CAN_ELIMINATE])
# Uses a value and cannot be eliminated.
@ -1338,8 +1341,9 @@ load("mesh_view_indices", [1], [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
# This should use something similar to Vulkan push constants and load_preamble
# should be relatively cheap.
# For now we only support accesses with a constant offset.
load("preamble", [], indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
store("preamble", [], indices=[BASE])
load("preamble", [], indices=[BASE, PREAMBLE_CLASS],
flags=[CAN_ELIMINATE, CAN_REORDER])
store("preamble", [], indices=[BASE, PREAMBLE_CLASS])
# A 64-bit bitfield indexed by I/O location storing 1 in bits corresponding to
# varyings that have the flat interpolation specifier in the fragment shader and

View file

@ -57,6 +57,7 @@ typedef struct {
unsigned can_move_users;
unsigned size, align;
nir_preamble_class class;
unsigned offset;
@ -549,7 +550,8 @@ replace_for_block(nir_builder *b, opt_preamble_ctx *ctx,
if (state->replace) {
nir_def *clone_def = nir_instr_def(clone);
nir_store_preamble(b, clone_def, .base = state->offset);
nir_store_preamble(b, clone_def, .base = state->offset,
.preamble_class = state->class);
}
}
}
@ -823,6 +825,7 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
def_state **candidates = malloc(sizeof(*candidates) * num_candidates);
unsigned candidate_idx = 0;
unsigned total_size = 0;
bool multiple_classes = false;
/* Step 3: Calculate value of candidates by propagating downwards. We try
* to share the value amongst can_move uses, in case there are multiple.
@ -866,10 +869,12 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
options->rewrite_cost_cb(def, options->cb_data);
if (state->benefit > 0) {
options->def_size(def, &state->size, &state->align);
options->def_size(def, &state->size, &state->align,
&state->class);
total_size = ALIGN_POT(total_size, state->align);
total_size += state->size;
candidates[candidate_idx++] = state;
multiple_classes |= (state->class != nir_preamble_class_general);
}
}
}
@ -890,27 +895,42 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
* alignment. We use a well-known greedy approximation, sorting by value
* divided by size.
*/
if (multiple_classes ||
(((*size) + total_size) > options->preamble_storage_size[0])) {
if (((*size) + total_size) > options->preamble_storage_size) {
qsort(candidates, num_candidates, sizeof(*candidates), candidate_sort);
}
unsigned offset = *size;
for (unsigned i = 0; i < num_candidates; i++) {
def_state *state = candidates[i];
offset = ALIGN_POT(offset, state->align);
nir_preamble_class c = state->class;
size[c] = ALIGN_POT(size[c], state->align);
if (offset + state->size > options->preamble_storage_size)
break;
assert(c < ARRAY_SIZE(options->preamble_storage_size));
if (size[c] + state->size > options->preamble_storage_size[c]) {
/* If there's only a single class and it's full, early-exit. If we have
* multiple classes, we do not early-exit as one class filling up does
* not necessarily mean the others are. This could be optimized but
* it doesn't really matter.
*/
if (!multiple_classes)
break;
/* Try falling back on on the default class */
state->class = nir_preamble_class_general;
c = state->class;
size[c] = ALIGN_POT(size[c], state->align);
if (size[c] + state->size > options->preamble_storage_size[c])
continue;
}
state->replace = true;
state->offset = offset;
state->offset = size[c];
offset += state->size;
size[c] += state->size;
}
*size = offset;
free(candidates);
/* Determine which if's need to be reconstructed, based on the replacements
@ -959,7 +979,8 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
nir_def *new_def =
nir_load_preamble(b, def->num_components, def->bit_size,
.base = state->offset);
.base = state->offset,
.preamble_class = state->class);
nir_def_rewrite_uses(def, new_def);
nir_instr_free_and_dce(instr);

View file

@ -1232,6 +1232,12 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
for (unsigned i = 0; i < info->num_indices; i++) {
unsigned idx = info->indices[i];
/* Skip "general" to denoise since it is the unremarkable default case */
if (idx == NIR_INTRINSIC_PREAMBLE_CLASS &&
nir_intrinsic_preamble_class(instr) == nir_preamble_class_general)
continue;
if (i == 0)
fprintf(fp, " (");
else
@ -1700,6 +1706,14 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
glsl_interp_mode_name(nir_intrinsic_interp_mode(instr)));
break;
case NIR_INTRINSIC_PREAMBLE_CLASS: {
/* "General" handled above */
nir_preamble_class cls = nir_intrinsic_preamble_class(instr);
assert(cls == nir_preamble_class_image);
fprintf(fp, "class=image");
break;
}
default: {
unsigned off = info->index_map[idx] - 1;
fprintf(fp, "%s=%d", nir_intrinsic_index_names[idx], instr->const_index[off]);

View file

@ -16,7 +16,8 @@
*/
static void
def_size(nir_def *def, unsigned *size, unsigned *align)
def_size(nir_def *def, unsigned *size, unsigned *align,
nir_preamble_class *class)
{
unsigned bit_size = def->bit_size == 1 ? 32 : def->bit_size;
/* Due to the implicit const file promotion we want to expand 16-bit values
@ -25,6 +26,7 @@ def_size(nir_def *def, unsigned *size, unsigned *align)
*/
*size = DIV_ROUND_UP(bit_size, 32) * def->num_components;
*align = 1;
*class = nir_preamble_class_general;
}
static bool
@ -319,7 +321,7 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v)
.subgroup_size_uniform = true,
.load_workgroup_size_allowed = true,
.def_size = def_size,
.preamble_storage_size = max_size,
.preamble_storage_size[nir_preamble_class_general] = max_size,
.instr_cost_cb = instr_cost,
.avoid_instr_cb = avoid_instr,
.rewrite_cost_cb = rewrite_cost,