ir3: Expand preamble rematerialization

Add the ability to deduplicate hoisted expressions, which will be
necessary to avoid repeatedly hoisting the same descriptors and blowing
our budget. The offset calculation may have itself been hoisted into the
preamble, so we also have to be able to hoist a bindless_resource_ir3
referencing a load_preamble and connect it to the source of the
corresponding store_preamble.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29873>
This commit is contained in:
Connor Abbott 2024-06-24 07:30:52 -04:00 committed by Marge Bot
parent 59940d6577
commit fdfe86aa52
3 changed files with 64 additions and 17 deletions

View file

@ -101,9 +101,12 @@ nir_def *ir3_load_driver_ubo_indirect(nir_builder *b, unsigned components,
unsigned base, nir_def *offset,
unsigned range);
bool ir3_def_is_rematerializable_for_preamble(nir_def *def);
bool ir3_def_is_rematerializable_for_preamble(nir_def *def,
nir_def **preamble_defs);
nir_def *ir3_rematerialize_def_for_preamble(nir_builder *b, nir_def *def);
nir_def *ir3_rematerialize_def_for_preamble(nir_builder *b, nir_def *def,
struct set *instr_set,
nir_def **preamble_defs);
struct driver_param_info {
uint32_t offset;

View file

@ -383,7 +383,8 @@ rematerialize_load_global_bases(nir_shader *nir,
continue;
range->ubo.global_base =
ir3_rematerialize_def_for_preamble(b, range->ubo.global_base);
ir3_rematerialize_def_for_preamble(b, range->ubo.global_base, NULL,
NULL);
}
return true;
@ -404,7 +405,8 @@ copy_global_to_uniform(nir_shader *nir, struct ir3_ubo_analysis_state *state)
assert(range->ubo.global);
nir_def *base =
ir3_rematerialize_def_for_preamble(b, range->ubo.global_base);
ir3_rematerialize_def_for_preamble(b, range->ubo.global_base, NULL,
NULL);
unsigned start = range->start;
if (start > (1 << 10)) {
/* This is happening pretty late, so we need to add the offset
@ -587,7 +589,7 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v)
nir_foreach_block (block, function->impl) {
nir_foreach_instr (instr, block) {
if (instr_is_load_const(instr) &&
ir3_def_is_rematerializable_for_preamble(nir_instr_as_intrinsic(instr)->src[0].ssa))
ir3_def_is_rematerializable_for_preamble(nir_instr_as_intrinsic(instr)->src[0].ssa, NULL))
gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), &state,
compiler->const_upload_unit,
&upload_remaining);

View file

@ -23,6 +23,7 @@
#include "ir3_compiler.h"
#include "ir3_nir.h"
#include "nir_instr_set.h"
/* Preamble optimization happens in two parts: first we generate the preamble
* using the generic NIR pass, then we setup the preamble sequence and inline
@ -340,7 +341,8 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v)
* opt_preamble. Currently we only handle a few uncomplicated intrinsics.
*/
bool
ir3_def_is_rematerializable_for_preamble(nir_def *def)
ir3_def_is_rematerializable_for_preamble(nir_def *def,
nir_def **preamble_defs)
{
switch (def->parent_instr->type) {
case nir_instr_type_load_const:
@ -349,10 +351,18 @@ ir3_def_is_rematerializable_for_preamble(nir_def *def)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(def->parent_instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_ubo:
return ir3_def_is_rematerializable_for_preamble(intrin->src[0].ssa) &&
ir3_def_is_rematerializable_for_preamble(intrin->src[1].ssa);
return ir3_def_is_rematerializable_for_preamble(intrin->src[0].ssa,
preamble_defs) &&
ir3_def_is_rematerializable_for_preamble(intrin->src[1].ssa,
preamble_defs) &&
(def->parent_instr->block->cf_node.parent->type ==
nir_cf_node_function ||
(nir_intrinsic_access(intrin) & ACCESS_CAN_SPECULATE));
case nir_intrinsic_bindless_resource_ir3:
return ir3_def_is_rematerializable_for_preamble(intrin->src[0].ssa);
return ir3_def_is_rematerializable_for_preamble(intrin->src[0].ssa,
preamble_defs);
case nir_intrinsic_load_preamble:
return !!preamble_defs;
default:
return false;
}
@ -360,7 +370,8 @@ ir3_def_is_rematerializable_for_preamble(nir_def *def)
case nir_instr_type_alu: {
nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr);
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
if (!ir3_def_is_rematerializable_for_preamble(alu->src[i].src.ssa))
if (!ir3_def_is_rematerializable_for_preamble(alu->src[i].src.ssa,
preamble_defs))
return false;
}
return true;
@ -372,6 +383,7 @@ ir3_def_is_rematerializable_for_preamble(nir_def *def)
static nir_def *
_rematerialize_def(nir_builder *b, struct hash_table *remap_ht,
struct set *instr_set, nir_def **preamble_defs,
nir_def *def)
{
if (_mesa_hash_table_search(remap_ht, def->parent_instr))
@ -382,15 +394,23 @@ _rematerialize_def(nir_builder *b, struct hash_table *remap_ht,
break;
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(def->parent_instr);
for (unsigned i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs;
i++)
_rematerialize_def(b, remap_ht, intrin->src[i].ssa);
if (intrin->intrinsic == nir_intrinsic_load_preamble) {
_mesa_hash_table_insert(remap_ht, def,
preamble_defs[nir_intrinsic_base(intrin)]);
return preamble_defs[nir_intrinsic_base(intrin)];
} else {
for (unsigned i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs;
i++)
_rematerialize_def(b, remap_ht, instr_set, preamble_defs,
intrin->src[i].ssa);
}
break;
}
case nir_instr_type_alu: {
nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr);
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
_rematerialize_def(b, remap_ht, alu->src[i].src.ssa);
_rematerialize_def(b, remap_ht, instr_set, preamble_defs,
alu->src[i].src.ssa);
break;
}
default:
@ -399,16 +419,38 @@ _rematerialize_def(nir_builder *b, struct hash_table *remap_ht,
nir_instr *instr = nir_instr_clone_deep(b->shader, def->parent_instr,
remap_ht);
nir_builder_instr_insert(b, instr);
if (instr_set) {
nir_instr *other_instr =
nir_instr_set_add_or_rewrite(instr_set, instr, NULL);
if (other_instr) {
instr = other_instr;
_mesa_hash_table_insert(remap_ht, def, nir_instr_def(other_instr));
} else {
nir_builder_instr_insert(b, instr);
}
} else {
nir_builder_instr_insert(b, instr);
}
return nir_instr_def(instr);
}
/* Hoist a given definition into the preamble. If "instr_set" is non-NULL,
* de-duplicate the hoisted definitions, and if "preamble_defs" is non-NULL then
* it is used to remap load_preamble instructions back to the original
* definition in the preamble, if the definition uses load_preamble
* instructions.
*/
nir_def *
ir3_rematerialize_def_for_preamble(nir_builder *b, nir_def *def)
ir3_rematerialize_def_for_preamble(nir_builder *b, nir_def *def,
struct set *instr_set,
nir_def **preamble_defs)
{
struct hash_table *remap_ht = _mesa_pointer_hash_table_create(NULL);
nir_def *new_def = _rematerialize_def(b, remap_ht, def);
nir_def *new_def =
_rematerialize_def(b, remap_ht, instr_set, preamble_defs, def);
_mesa_hash_table_destroy(remap_ht, NULL);