diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index eb9805176bb..32dad47ff14 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -3042,7 +3042,8 @@ optimize_bounds(nir_builder *b, nir_intrinsic_instr *intr, void *data) } static void -agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size) +agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size, + uint8_t *ts_count) { /* This runs only once up front since other optimizations don't affect it */ NIR_PASS(_, nir, nir_opt_shrink_stores, true); @@ -3147,8 +3148,12 @@ agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size) if (preamble_size && (!(agx_compiler_debug & AGX_DBG_NOPREAMBLE))) { unsigned temp = *preamble_size; - NIR_PASS(_, nir, agx_nir_opt_preamble, &temp); + unsigned temp_ts_count = ts_count ? *ts_count : 1000 /* large finite */; + NIR_PASS(_, nir, agx_nir_opt_preamble, &temp, &temp_ts_count); *preamble_size = temp; + + if (ts_count) + *ts_count = temp_ts_count; } /* Forming preambles may dramatically reduce the instruction count @@ -3895,8 +3900,10 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, nir_metadata_control_flow, NULL); info->push_count = key->reserved_preamble; - agx_optimize_nir(nir, key->dev.soft_fault, - key->secondary ? NULL : &info->push_count); + agx_optimize_nir( + nir, key->dev.soft_fault, key->secondary ? NULL : &info->push_count, + (key->secondary || !key->promote_textures) ? NULL + : &info->texture_state_count); if (nir->info.stage == MESA_SHADER_FRAGMENT) { info->varyings.fs.nr_cf = key->fs.cf_base; diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 103eb78f81f..13635c7abe3 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -140,6 +140,9 @@ struct agx_shader_info { /* Uses txf and hence needs a txf sampler mapped */ bool uses_txf; + /* Number of texture state registers pushed by the preamble. */ + uint8_t texture_state_count; + /* Number of 16-bit registers used by the main shader and preamble * respectively. */ @@ -283,6 +286,12 @@ struct agx_shader_key { */ bool promote_constants; + /* Similarly whether the driver supports promoting bindless textures. + * Currently this works only if non-bindless textures are not used, but + * none of our drivers mix bindless / non-bindless usage. + */ + bool promote_textures; + /* Set if this is a non-monolithic shader that must be linked with additional * shader parts before the program can be used. This suppresses omission of * `stop` instructions, which the linker must insert instead. diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index a0fefa9d8de..0c78895252c 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -1080,7 +1080,8 @@ void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies, void agx_compute_liveness(agx_context *ctx); void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I); -bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size); +bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size, + unsigned *ts_count); bool agx_nir_lower_load_mask(nir_shader *shader); bool agx_nir_lower_ubo(nir_shader *shader); bool agx_nir_lower_shared_bitsize(nir_shader *shader); diff --git a/src/asahi/compiler/agx_nir_opt_preamble.c b/src/asahi/compiler/agx_nir_opt_preamble.c index 2fbb97e689d..7fc70a62063 100644 --- a/src/asahi/compiler/agx_nir_opt_preamble.c +++ b/src/asahi/compiler/agx_nir_opt_preamble.c @@ -8,9 +8,20 @@ #include "util/macros.h" #include "agx_compiler.h" #include "nir.h" -#include "nir_intrinsics.h" #include "nir_opcodes.h" +static bool +is_promotable_texture_handle(nir_def *def) +{ + nir_instr *instr = def->parent_instr; + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + return intr->intrinsic == nir_intrinsic_bindless_image_agx && + nir_intrinsic_desc_set(intr) < 32 /* encoding restriction */; +} + static void def_size(nir_def *def, unsigned *size, unsigned *align, nir_preamble_class *class) @@ -19,7 +30,8 @@ def_size(nir_def *def, unsigned *size, unsigned *align, *size = (bit_size * def->num_components) / 16; *align = bit_size / 16; - *class = nir_preamble_class_general; + *class = is_promotable_texture_handle(def) ? nir_preamble_class_image + : nir_preamble_class_general; } static bool @@ -217,6 +229,7 @@ instr_cost(nir_instr *instr, const void *data) case nir_intrinsic_load_global_constant: case nir_intrinsic_load_constant_agx: case nir_intrinsic_load_ubo: + case nir_intrinsic_bindless_image_agx: return 10.0; case nir_intrinsic_ddx: case nir_intrinsic_ddx_fine: @@ -280,11 +293,7 @@ rewrite_cost(nir_def *def, const void *data) static bool avoid_instr(const nir_instr *instr, const void *data) { - if (instr->type != nir_instr_type_intrinsic) - return false; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - return intr->intrinsic == nir_intrinsic_bindless_image_agx; + return false; } static const nir_opt_preamble_options preamble_options = { @@ -302,10 +311,102 @@ static const nir_opt_preamble_options preamble_options = { * 480 seems to be a sweetspot, based on a few minutes of shader-db. */ .preamble_storage_size[nir_preamble_class_general] = 480, + + /* We have at least 32 texture state registers. TODO: check for more? */ + .preamble_storage_size[nir_preamble_class_image] = 32, }; -bool -agx_nir_opt_preamble(nir_shader *nir, unsigned *preamble_size) +/* + * Bindless image handles can't be stored to uniforms, so we move them back to + * the main shader. Effectively un-optimizing the preamble. + */ +static bool +lower_store_preamble(nir_builder *b, nir_intrinsic_instr *intr, void *data) { - return nir_opt_preamble(nir, &preamble_options, preamble_size); + int16_t *heaps = data; + if (intr->intrinsic != nir_intrinsic_store_preamble || + nir_intrinsic_preamble_class(intr) == nir_preamble_class_image) + return false; + + nir_intrinsic_instr *handle = nir_src_as_intrinsic(intr->src[0]); + if (!handle || handle->intrinsic != nir_intrinsic_bindless_image_agx) + return false; + + heaps[nir_intrinsic_base(intr)] = nir_intrinsic_desc_set(handle); + nir_src_rewrite(&intr->src[0], handle->src[0].ssa); + return true; +} + +static bool +lower_preamble(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + if (intr->intrinsic != nir_intrinsic_load_preamble) + return false; + + int16_t *heaps = data; + b->cursor = nir_after_instr(&intr->instr); + + unsigned base = nir_intrinsic_base(intr); + nir_def *new_ = NULL; + bool ts = nir_intrinsic_preamble_class(intr) == nir_preamble_class_image; + if (!ts && heaps[base] >= 0) { + new_ = nir_bindless_image_agx(b, &intr->def, .desc_set = heaps[base]); + } + + nir_foreach_use_safe(use, &intr->def) { + nir_instr *parent = nir_src_parent_instr(use); + + if (parent->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *pintr = nir_instr_as_intrinsic(parent); + + if (ts) { + nir_rewrite_image_intrinsic(pintr, nir_imm_intN_t(b, base / 2, 16), + false); + } else if (new_ != NULL && + pintr->intrinsic != nir_intrinsic_bindless_image_agx) { + nir_src_rewrite(use, new_); + } + } else if (parent->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(parent); + nir_tex_src *src = (nir_tex_src *)use; + if (src->src_type != nir_tex_src_texture_handle) + continue; + + if (ts) { + nir_steal_tex_src(tex, nir_tex_src_texture_handle); + tex->texture_index = base / 2; + } else { + assert(new_ != NULL); + nir_src_rewrite(use, new_); + } + } + } + + return true; +} + +bool +agx_nir_opt_preamble(nir_shader *nir, unsigned *preamble_size, + unsigned *ts_count) +{ + bool progress = false; + + unsigned sizes[] = {*preamble_size, *ts_count}; + NIR_PASS(progress, nir, nir_opt_preamble, &preamble_options, sizes); + *preamble_size = sizes[0]; + *ts_count = sizes[1]; + + if (progress) { + int16_t heap[512]; + memset(heap, ~0, sizeof(heap)); + + nir_function_intrinsics_pass(nir_shader_get_preamble(nir), + lower_store_preamble, + nir_metadata_control_flow, heap); + + NIR_PASS(progress, nir, nir_shader_intrinsics_pass, lower_preamble, + nir_metadata_control_flow, heap); + } + + return progress; }