diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 149f60221e0..b051c7a196f 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -101,9 +101,12 @@ nir_def *ir3_load_driver_ubo_indirect(nir_builder *b, unsigned components, unsigned base, nir_def *offset, unsigned range); -bool ir3_def_is_rematerializable_for_preamble(nir_def *def); +bool ir3_def_is_rematerializable_for_preamble(nir_def *def, + nir_def **preamble_defs); -nir_def *ir3_rematerialize_def_for_preamble(nir_builder *b, nir_def *def); +nir_def *ir3_rematerialize_def_for_preamble(nir_builder *b, nir_def *def, + struct set *instr_set, + nir_def **preamble_defs); struct driver_param_info { uint32_t offset; diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index e296d35192d..92d801398c6 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -383,7 +383,8 @@ rematerialize_load_global_bases(nir_shader *nir, continue; range->ubo.global_base = - ir3_rematerialize_def_for_preamble(b, range->ubo.global_base); + ir3_rematerialize_def_for_preamble(b, range->ubo.global_base, NULL, + NULL); } return true; @@ -404,7 +405,8 @@ copy_global_to_uniform(nir_shader *nir, struct ir3_ubo_analysis_state *state) assert(range->ubo.global); nir_def *base = - ir3_rematerialize_def_for_preamble(b, range->ubo.global_base); + ir3_rematerialize_def_for_preamble(b, range->ubo.global_base, NULL, + NULL); unsigned start = range->start; if (start > (1 << 10)) { /* This is happening pretty late, so we need to add the offset @@ -587,7 +589,7 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v) nir_foreach_block (block, function->impl) { nir_foreach_instr (instr, block) { if (instr_is_load_const(instr) && - ir3_def_is_rematerializable_for_preamble(nir_instr_as_intrinsic(instr)->src[0].ssa)) + ir3_def_is_rematerializable_for_preamble(nir_instr_as_intrinsic(instr)->src[0].ssa, NULL)) gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), &state, compiler->const_upload_unit, &upload_remaining); diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c b/src/freedreno/ir3/ir3_nir_opt_preamble.c index 468b55f659a..8ef74683003 100644 --- a/src/freedreno/ir3/ir3_nir_opt_preamble.c +++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c @@ -23,6 +23,7 @@ #include "ir3_compiler.h" #include "ir3_nir.h" +#include "nir_instr_set.h" /* Preamble optimization happens in two parts: first we generate the preamble * using the generic NIR pass, then we setup the preamble sequence and inline @@ -340,7 +341,8 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v) * opt_preamble. Currently we only handle a few uncomplicated intrinsics. */ bool -ir3_def_is_rematerializable_for_preamble(nir_def *def) +ir3_def_is_rematerializable_for_preamble(nir_def *def, + nir_def **preamble_defs) { switch (def->parent_instr->type) { case nir_instr_type_load_const: @@ -349,10 +351,18 @@ ir3_def_is_rematerializable_for_preamble(nir_def *def) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(def->parent_instr); switch (intrin->intrinsic) { case nir_intrinsic_load_ubo: - return ir3_def_is_rematerializable_for_preamble(intrin->src[0].ssa) && - ir3_def_is_rematerializable_for_preamble(intrin->src[1].ssa); + return ir3_def_is_rematerializable_for_preamble(intrin->src[0].ssa, + preamble_defs) && + ir3_def_is_rematerializable_for_preamble(intrin->src[1].ssa, + preamble_defs) && + (def->parent_instr->block->cf_node.parent->type == + nir_cf_node_function || + (nir_intrinsic_access(intrin) & ACCESS_CAN_SPECULATE)); case nir_intrinsic_bindless_resource_ir3: - return ir3_def_is_rematerializable_for_preamble(intrin->src[0].ssa); + return ir3_def_is_rematerializable_for_preamble(intrin->src[0].ssa, + preamble_defs); + case nir_intrinsic_load_preamble: + return !!preamble_defs; default: return false; } @@ -360,7 +370,8 @@ ir3_def_is_rematerializable_for_preamble(nir_def *def) case nir_instr_type_alu: { nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr); for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { - if (!ir3_def_is_rematerializable_for_preamble(alu->src[i].src.ssa)) + if (!ir3_def_is_rematerializable_for_preamble(alu->src[i].src.ssa, + preamble_defs)) return false; } return true; @@ -372,6 +383,7 @@ ir3_def_is_rematerializable_for_preamble(nir_def *def) static nir_def * _rematerialize_def(nir_builder *b, struct hash_table *remap_ht, + struct set *instr_set, nir_def **preamble_defs, nir_def *def) { if (_mesa_hash_table_search(remap_ht, def->parent_instr)) @@ -382,15 +394,23 @@ _rematerialize_def(nir_builder *b, struct hash_table *remap_ht, break; case nir_instr_type_intrinsic: { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(def->parent_instr); - for (unsigned i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; - i++) - _rematerialize_def(b, remap_ht, intrin->src[i].ssa); + if (intrin->intrinsic == nir_intrinsic_load_preamble) { + _mesa_hash_table_insert(remap_ht, def, + preamble_defs[nir_intrinsic_base(intrin)]); + return preamble_defs[nir_intrinsic_base(intrin)]; + } else { + for (unsigned i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; + i++) + _rematerialize_def(b, remap_ht, instr_set, preamble_defs, + intrin->src[i].ssa); + } break; } case nir_instr_type_alu: { nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr); for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) - _rematerialize_def(b, remap_ht, alu->src[i].src.ssa); + _rematerialize_def(b, remap_ht, instr_set, preamble_defs, + alu->src[i].src.ssa); break; } default: @@ -399,16 +419,38 @@ _rematerialize_def(nir_builder *b, struct hash_table *remap_ht, nir_instr *instr = nir_instr_clone_deep(b->shader, def->parent_instr, remap_ht); - nir_builder_instr_insert(b, instr); + if (instr_set) { + nir_instr *other_instr = + nir_instr_set_add_or_rewrite(instr_set, instr, NULL); + if (other_instr) { + instr = other_instr; + _mesa_hash_table_insert(remap_ht, def, nir_instr_def(other_instr)); + } else { + nir_builder_instr_insert(b, instr); + } + } else { + nir_builder_instr_insert(b, instr); + } + return nir_instr_def(instr); } +/* Hoist a given definition into the preamble. If "instr_set" is non-NULL, + * de-duplicate the hoisted definitions, and if "preamble_defs" is non-NULL then + * it is used to remap load_preamble instructions back to the original + * definition in the preamble, if the definition uses load_preamble + * instructions. + */ + nir_def * -ir3_rematerialize_def_for_preamble(nir_builder *b, nir_def *def) +ir3_rematerialize_def_for_preamble(nir_builder *b, nir_def *def, + struct set *instr_set, + nir_def **preamble_defs) { struct hash_table *remap_ht = _mesa_pointer_hash_table_create(NULL); - nir_def *new_def = _rematerialize_def(b, remap_ht, def); + nir_def *new_def = + _rematerialize_def(b, remap_ht, instr_set, preamble_defs, def); _mesa_hash_table_destroy(remap_ht, NULL);