agx: add bindless texture promotion support

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35949>
This commit is contained in:
Alyssa Rosenzweig 2025-07-04 18:07:54 -04:00
parent 83ad08feff
commit ffe262ee11
4 changed files with 133 additions and 15 deletions

View file

@ -3042,7 +3042,8 @@ optimize_bounds(nir_builder *b, nir_intrinsic_instr *intr, void *data)
}
static void
agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size)
agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size,
uint8_t *ts_count)
{
/* This runs only once up front since other optimizations don't affect it */
NIR_PASS(_, nir, nir_opt_shrink_stores, true);
@ -3147,8 +3148,12 @@ agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size)
if (preamble_size && (!(agx_compiler_debug & AGX_DBG_NOPREAMBLE))) {
unsigned temp = *preamble_size;
NIR_PASS(_, nir, agx_nir_opt_preamble, &temp);
unsigned temp_ts_count = ts_count ? *ts_count : 1000 /* large finite */;
NIR_PASS(_, nir, agx_nir_opt_preamble, &temp, &temp_ts_count);
*preamble_size = temp;
if (ts_count)
*ts_count = temp_ts_count;
}
/* Forming preambles may dramatically reduce the instruction count
@ -3895,8 +3900,10 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
nir_metadata_control_flow, NULL);
info->push_count = key->reserved_preamble;
agx_optimize_nir(nir, key->dev.soft_fault,
key->secondary ? NULL : &info->push_count);
agx_optimize_nir(
nir, key->dev.soft_fault, key->secondary ? NULL : &info->push_count,
(key->secondary || !key->promote_textures) ? NULL
: &info->texture_state_count);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
info->varyings.fs.nr_cf = key->fs.cf_base;

View file

@ -140,6 +140,9 @@ struct agx_shader_info {
/* Uses txf and hence needs a txf sampler mapped */
bool uses_txf;
/* Number of texture state registers pushed by the preamble. */
uint8_t texture_state_count;
/* Number of 16-bit registers used by the main shader and preamble
* respectively.
*/
@ -283,6 +286,12 @@ struct agx_shader_key {
*/
bool promote_constants;
/* Similarly whether the driver supports promoting bindless textures.
* Currently this works only if non-bindless textures are not used, but
* none of our drivers mix bindless / non-bindless usage.
*/
bool promote_textures;
/* Set if this is a non-monolithic shader that must be linked with additional
* shader parts before the program can be used. This suppresses omission of
* `stop` instructions, which the linker must insert instead.

View file

@ -1080,7 +1080,8 @@ void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
void agx_compute_liveness(agx_context *ctx);
void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size,
unsigned *ts_count);
bool agx_nir_lower_load_mask(nir_shader *shader);
bool agx_nir_lower_ubo(nir_shader *shader);
bool agx_nir_lower_shared_bitsize(nir_shader *shader);

View file

@ -8,9 +8,20 @@
#include "util/macros.h"
#include "agx_compiler.h"
#include "nir.h"
#include "nir_intrinsics.h"
#include "nir_opcodes.h"
static bool
is_promotable_texture_handle(nir_def *def)
{
nir_instr *instr = def->parent_instr;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
return intr->intrinsic == nir_intrinsic_bindless_image_agx &&
nir_intrinsic_desc_set(intr) < 32 /* encoding restriction */;
}
static void
def_size(nir_def *def, unsigned *size, unsigned *align,
nir_preamble_class *class)
@ -19,7 +30,8 @@ def_size(nir_def *def, unsigned *size, unsigned *align,
*size = (bit_size * def->num_components) / 16;
*align = bit_size / 16;
*class = nir_preamble_class_general;
*class = is_promotable_texture_handle(def) ? nir_preamble_class_image
: nir_preamble_class_general;
}
static bool
@ -217,6 +229,7 @@ instr_cost(nir_instr *instr, const void *data)
case nir_intrinsic_load_global_constant:
case nir_intrinsic_load_constant_agx:
case nir_intrinsic_load_ubo:
case nir_intrinsic_bindless_image_agx:
return 10.0;
case nir_intrinsic_ddx:
case nir_intrinsic_ddx_fine:
@ -280,11 +293,7 @@ rewrite_cost(nir_def *def, const void *data)
static bool
avoid_instr(const nir_instr *instr, const void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
return intr->intrinsic == nir_intrinsic_bindless_image_agx;
return false;
}
static const nir_opt_preamble_options preamble_options = {
@ -302,10 +311,102 @@ static const nir_opt_preamble_options preamble_options = {
* 480 seems to be a sweetspot, based on a few minutes of shader-db.
*/
.preamble_storage_size[nir_preamble_class_general] = 480,
/* We have at least 32 texture state registers. TODO: check for more? */
.preamble_storage_size[nir_preamble_class_image] = 32,
};
bool
agx_nir_opt_preamble(nir_shader *nir, unsigned *preamble_size)
/*
* Bindless image handles can't be stored to uniforms, so we move them back to
* the main shader. Effectively un-optimizing the preamble.
*/
static bool
lower_store_preamble(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
return nir_opt_preamble(nir, &preamble_options, preamble_size);
int16_t *heaps = data;
if (intr->intrinsic != nir_intrinsic_store_preamble ||
nir_intrinsic_preamble_class(intr) == nir_preamble_class_image)
return false;
nir_intrinsic_instr *handle = nir_src_as_intrinsic(intr->src[0]);
if (!handle || handle->intrinsic != nir_intrinsic_bindless_image_agx)
return false;
heaps[nir_intrinsic_base(intr)] = nir_intrinsic_desc_set(handle);
nir_src_rewrite(&intr->src[0], handle->src[0].ssa);
return true;
}
static bool
lower_preamble(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_load_preamble)
return false;
int16_t *heaps = data;
b->cursor = nir_after_instr(&intr->instr);
unsigned base = nir_intrinsic_base(intr);
nir_def *new_ = NULL;
bool ts = nir_intrinsic_preamble_class(intr) == nir_preamble_class_image;
if (!ts && heaps[base] >= 0) {
new_ = nir_bindless_image_agx(b, &intr->def, .desc_set = heaps[base]);
}
nir_foreach_use_safe(use, &intr->def) {
nir_instr *parent = nir_src_parent_instr(use);
if (parent->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *pintr = nir_instr_as_intrinsic(parent);
if (ts) {
nir_rewrite_image_intrinsic(pintr, nir_imm_intN_t(b, base / 2, 16),
false);
} else if (new_ != NULL &&
pintr->intrinsic != nir_intrinsic_bindless_image_agx) {
nir_src_rewrite(use, new_);
}
} else if (parent->type == nir_instr_type_tex) {
nir_tex_instr *tex = nir_instr_as_tex(parent);
nir_tex_src *src = (nir_tex_src *)use;
if (src->src_type != nir_tex_src_texture_handle)
continue;
if (ts) {
nir_steal_tex_src(tex, nir_tex_src_texture_handle);
tex->texture_index = base / 2;
} else {
assert(new_ != NULL);
nir_src_rewrite(use, new_);
}
}
}
return true;
}
bool
agx_nir_opt_preamble(nir_shader *nir, unsigned *preamble_size,
unsigned *ts_count)
{
bool progress = false;
unsigned sizes[] = {*preamble_size, *ts_count};
NIR_PASS(progress, nir, nir_opt_preamble, &preamble_options, sizes);
*preamble_size = sizes[0];
*ts_count = sizes[1];
if (progress) {
int16_t heap[512];
memset(heap, ~0, sizeof(heap));
nir_function_intrinsics_pass(nir_shader_get_preamble(nir),
lower_store_preamble,
nir_metadata_control_flow, heap);
NIR_PASS(progress, nir, nir_shader_intrinsics_pass, lower_preamble,
nir_metadata_control_flow, heap);
}
return progress;
}