mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 02:58:05 +02:00
nir/opt_preamble: add "register class" concept
Class represents an indexed "ideal" register class, where non-general classes only allow defs that choose that class in the def_size callback. nir_opt_preamble will try to assign specialized classes where possible, falling back to the general class once the special-purpose classes are exhausted. AGX will use this mechanism to promote bindless texture handles to bound texture registers where possible, falling back to pushing the handle as a uniform where not possible. Supporting multiple classes in nir_opt_preamble allows this multi-level hoisting to work in a single nir_opt_preamble call with proper global behaviour. Add this concept to nir_opt_preamble so we can use it in AGX later in this MR. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Job Noorman <job@noorman.info> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35949>
This commit is contained in:
parent
89403487b1
commit
d55bdb4ec5
6 changed files with 82 additions and 22 deletions
|
|
@ -11,12 +11,14 @@
|
|||
#include "nir_opcodes.h"
|
||||
|
||||
static void
|
||||
def_size(nir_def *def, unsigned *size, unsigned *align)
|
||||
def_size(nir_def *def, unsigned *size, unsigned *align,
|
||||
nir_preamble_class *class)
|
||||
{
|
||||
unsigned bit_size = MAX2(def->bit_size, 16);
|
||||
|
||||
*size = (bit_size * def->num_components) / 16;
|
||||
*align = bit_size / 16;
|
||||
*class = nir_preamble_class_general;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -330,7 +332,7 @@ static const nir_opt_preamble_options preamble_options = {
|
|||
* hot constants so we don't end up rematerializing all over the place.
|
||||
* 480 seems to be a sweetspot, based on a few minutes of shader-db.
|
||||
*/
|
||||
.preamble_storage_size = 480,
|
||||
.preamble_storage_size[nir_preamble_class_general] = 480,
|
||||
};
|
||||
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -224,6 +224,16 @@ typedef enum {
|
|||
nir_resource_intel_sampler_embedded = 1u << 4,
|
||||
} nir_resource_data_intel;
|
||||
|
||||
/**
|
||||
* Register class for registers managed by nir_opt_preamble. General can handle
|
||||
* anything, the others are driver-specific but with common names for nir_print.
|
||||
*/
|
||||
typedef enum {
|
||||
nir_preamble_class_general,
|
||||
nir_preamble_class_image,
|
||||
nir_preamble_num_classes,
|
||||
} nir_preamble_class;
|
||||
|
||||
/**
|
||||
* Which components to interpret as signed in cmat_muladd.
|
||||
* See 'Cooperative Matrix Operands' in SPV_KHR_cooperative_matrix.
|
||||
|
|
@ -6343,13 +6353,20 @@ typedef struct nir_opt_preamble_options {
|
|||
/* True if load_workgroup_size is supported in the preamble. */
|
||||
bool load_workgroup_size_allowed;
|
||||
|
||||
/* size/align for load/store_preamble. */
|
||||
void (*def_size)(nir_def *def, unsigned *size, unsigned *align);
|
||||
/* size/align/class for load/store_preamble.
|
||||
*
|
||||
* Defs with class "general" will always be allocated as general. Other
|
||||
* classes will attempt to allocate as the specialized class but may fallback
|
||||
* to general. This mechanism enables "tiered" classes in a single
|
||||
* nir_opt_preamble call with proper global behaviour.
|
||||
*/
|
||||
void (*def_size)(nir_def *def, unsigned *size, unsigned *align,
|
||||
nir_preamble_class *class_);
|
||||
|
||||
/* Total available size for load/store_preamble storage, in units
|
||||
/* Total available size per class for load/store_preamble storage, in units
|
||||
* determined by def_size.
|
||||
*/
|
||||
unsigned preamble_storage_size;
|
||||
unsigned preamble_storage_size[nir_preamble_num_classes];
|
||||
|
||||
/* Give the cost for an instruction. nir_opt_preamble will prioritize
|
||||
* instructions with higher costs. Instructions with cost 0 may still be
|
||||
|
|
|
|||
|
|
@ -347,6 +347,9 @@ index("bool", "explicit_coord")
|
|||
# The index of the format string used by a printf. (u_printf_info element of the shader)
|
||||
index("unsigned", "fmt_idx")
|
||||
|
||||
# Register class for load/store_preamble
|
||||
index("nir_preamble_class", "preamble_class")
|
||||
|
||||
intrinsic("nop", flags=[CAN_ELIMINATE])
|
||||
|
||||
# Uses a value and cannot be eliminated.
|
||||
|
|
@ -1338,8 +1341,9 @@ load("mesh_view_indices", [1], [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
|
|||
# This should use something similar to Vulkan push constants and load_preamble
|
||||
# should be relatively cheap.
|
||||
# For now we only support accesses with a constant offset.
|
||||
load("preamble", [], indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
store("preamble", [], indices=[BASE])
|
||||
load("preamble", [], indices=[BASE, PREAMBLE_CLASS],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
store("preamble", [], indices=[BASE, PREAMBLE_CLASS])
|
||||
|
||||
# A 64-bit bitfield indexed by I/O location storing 1 in bits corresponding to
|
||||
# varyings that have the flat interpolation specifier in the fragment shader and
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ typedef struct {
|
|||
unsigned can_move_users;
|
||||
|
||||
unsigned size, align;
|
||||
nir_preamble_class class;
|
||||
|
||||
unsigned offset;
|
||||
|
||||
|
|
@ -549,7 +550,8 @@ replace_for_block(nir_builder *b, opt_preamble_ctx *ctx,
|
|||
|
||||
if (state->replace) {
|
||||
nir_def *clone_def = nir_instr_def(clone);
|
||||
nir_store_preamble(b, clone_def, .base = state->offset);
|
||||
nir_store_preamble(b, clone_def, .base = state->offset,
|
||||
.preamble_class = state->class);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -823,6 +825,7 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
|||
def_state **candidates = malloc(sizeof(*candidates) * num_candidates);
|
||||
unsigned candidate_idx = 0;
|
||||
unsigned total_size = 0;
|
||||
bool multiple_classes = false;
|
||||
|
||||
/* Step 3: Calculate value of candidates by propagating downwards. We try
|
||||
* to share the value amongst can_move uses, in case there are multiple.
|
||||
|
|
@ -866,10 +869,12 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
|||
options->rewrite_cost_cb(def, options->cb_data);
|
||||
|
||||
if (state->benefit > 0) {
|
||||
options->def_size(def, &state->size, &state->align);
|
||||
options->def_size(def, &state->size, &state->align,
|
||||
&state->class);
|
||||
total_size = ALIGN_POT(total_size, state->align);
|
||||
total_size += state->size;
|
||||
candidates[candidate_idx++] = state;
|
||||
multiple_classes |= (state->class != nir_preamble_class_general);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -890,27 +895,42 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
|||
* alignment. We use a well-known greedy approximation, sorting by value
|
||||
* divided by size.
|
||||
*/
|
||||
if (multiple_classes ||
|
||||
(((*size) + total_size) > options->preamble_storage_size[0])) {
|
||||
|
||||
if (((*size) + total_size) > options->preamble_storage_size) {
|
||||
qsort(candidates, num_candidates, sizeof(*candidates), candidate_sort);
|
||||
}
|
||||
|
||||
unsigned offset = *size;
|
||||
for (unsigned i = 0; i < num_candidates; i++) {
|
||||
def_state *state = candidates[i];
|
||||
offset = ALIGN_POT(offset, state->align);
|
||||
nir_preamble_class c = state->class;
|
||||
size[c] = ALIGN_POT(size[c], state->align);
|
||||
|
||||
if (offset + state->size > options->preamble_storage_size)
|
||||
break;
|
||||
assert(c < ARRAY_SIZE(options->preamble_storage_size));
|
||||
|
||||
if (size[c] + state->size > options->preamble_storage_size[c]) {
|
||||
/* If there's only a single class and it's full, early-exit. If we have
|
||||
* multiple classes, we do not early-exit as one class filling up does
|
||||
* not necessarily mean the others are. This could be optimized but
|
||||
* it doesn't really matter.
|
||||
*/
|
||||
if (!multiple_classes)
|
||||
break;
|
||||
|
||||
/* Try falling back on on the default class */
|
||||
state->class = nir_preamble_class_general;
|
||||
c = state->class;
|
||||
size[c] = ALIGN_POT(size[c], state->align);
|
||||
if (size[c] + state->size > options->preamble_storage_size[c])
|
||||
continue;
|
||||
}
|
||||
|
||||
state->replace = true;
|
||||
state->offset = offset;
|
||||
state->offset = size[c];
|
||||
|
||||
offset += state->size;
|
||||
size[c] += state->size;
|
||||
}
|
||||
|
||||
*size = offset;
|
||||
|
||||
free(candidates);
|
||||
|
||||
/* Determine which if's need to be reconstructed, based on the replacements
|
||||
|
|
@ -959,7 +979,8 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
|||
|
||||
nir_def *new_def =
|
||||
nir_load_preamble(b, def->num_components, def->bit_size,
|
||||
.base = state->offset);
|
||||
.base = state->offset,
|
||||
.preamble_class = state->class);
|
||||
|
||||
nir_def_rewrite_uses(def, new_def);
|
||||
nir_instr_free_and_dce(instr);
|
||||
|
|
|
|||
|
|
@ -1232,6 +1232,12 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
|
||||
for (unsigned i = 0; i < info->num_indices; i++) {
|
||||
unsigned idx = info->indices[i];
|
||||
|
||||
/* Skip "general" to denoise since it is the unremarkable default case */
|
||||
if (idx == NIR_INTRINSIC_PREAMBLE_CLASS &&
|
||||
nir_intrinsic_preamble_class(instr) == nir_preamble_class_general)
|
||||
continue;
|
||||
|
||||
if (i == 0)
|
||||
fprintf(fp, " (");
|
||||
else
|
||||
|
|
@ -1700,6 +1706,14 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
glsl_interp_mode_name(nir_intrinsic_interp_mode(instr)));
|
||||
break;
|
||||
|
||||
case NIR_INTRINSIC_PREAMBLE_CLASS: {
|
||||
/* "General" handled above */
|
||||
nir_preamble_class cls = nir_intrinsic_preamble_class(instr);
|
||||
assert(cls == nir_preamble_class_image);
|
||||
fprintf(fp, "class=image");
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
unsigned off = info->index_map[idx] - 1;
|
||||
fprintf(fp, "%s=%d", nir_intrinsic_index_names[idx], instr->const_index[off]);
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@
|
|||
*/
|
||||
|
||||
static void
|
||||
def_size(nir_def *def, unsigned *size, unsigned *align)
|
||||
def_size(nir_def *def, unsigned *size, unsigned *align,
|
||||
nir_preamble_class *class)
|
||||
{
|
||||
unsigned bit_size = def->bit_size == 1 ? 32 : def->bit_size;
|
||||
/* Due to the implicit const file promotion we want to expand 16-bit values
|
||||
|
|
@ -25,6 +26,7 @@ def_size(nir_def *def, unsigned *size, unsigned *align)
|
|||
*/
|
||||
*size = DIV_ROUND_UP(bit_size, 32) * def->num_components;
|
||||
*align = 1;
|
||||
*class = nir_preamble_class_general;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -319,7 +321,7 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v)
|
|||
.subgroup_size_uniform = true,
|
||||
.load_workgroup_size_allowed = true,
|
||||
.def_size = def_size,
|
||||
.preamble_storage_size = max_size,
|
||||
.preamble_storage_size[nir_preamble_class_general] = max_size,
|
||||
.instr_cost_cb = instr_cost,
|
||||
.avoid_instr_cb = avoid_instr,
|
||||
.rewrite_cost_cb = rewrite_cost,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue