diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 12eb373b045..2bcaa274fac 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -2272,6 +2272,7 @@ bool ir3_merge_rpt(struct ir3 *ir, struct ir3_shader_variant *v); bool ir3_opt_predicates(struct ir3 *ir, struct ir3_shader_variant *v); bool ir3_create_alias_tex_regs(struct ir3 *ir); bool ir3_insert_alias_tex(struct ir3 *ir); +bool ir3_create_alias_rt(struct ir3 *ir, struct ir3_shader_variant *v); /* unreachable block elimination: */ bool ir3_remove_unreachable(struct ir3 *ir); diff --git a/src/freedreno/ir3/ir3_alias.c b/src/freedreno/ir3/ir3_alias.c index b0aeea9daa8..406bd53c64d 100644 --- a/src/freedreno/ir3/ir3_alias.c +++ b/src/freedreno/ir3/ir3_alias.c @@ -637,3 +637,126 @@ ir3_insert_alias_tex(struct ir3 *ir) return progress; } + +static struct ir3_instruction * +get_or_create_shpe(struct ir3 *ir) +{ + struct ir3_instruction *shpe = ir3_find_shpe(ir); + + if (!shpe) { + shpe = ir3_create_empty_preamble(ir); + assert(shpe); + } + + return shpe; +} + +static bool +create_output_aliases(struct ir3_shader_variant *v, struct ir3_instruction *end) +{ + bool progress = false; + struct ir3_instruction *shpe = NULL; + + foreach_src_n (src, src_n, end) { + struct ir3_shader_output *output = &v->outputs[end->end.outidxs[src_n]]; + + if (output->slot < FRAG_RESULT_DATA0 || + output->slot > FRAG_RESULT_DATA7) { + continue; + } + + assert(src->flags & IR3_REG_SSA); + struct ir3_instruction *src_instr = src->def->instr; + + if (src_instr->opc != OPC_META_COLLECT && src_instr->opc != OPC_MOV) { + continue; + } + + unsigned rt = output->slot - FRAG_RESULT_DATA0; + + foreach_src_n (comp_src, comp, src_instr) { + if (!(comp_src->flags & (IR3_REG_IMMED | IR3_REG_CONST))) { + /* Only const and immediate values can be aliased. */ + continue; + } + + if ((comp_src->flags & IR3_REG_HALF) && + (comp_src->flags & IR3_REG_CONST)) { + /* alias.rt doesn't seem to work with half const. + * TODO figure out what's going wrong here. Might just be + * unsupported because the blob only uses it in one CTS test. + */ + continue; + } + + if (!shpe) { + shpe = get_or_create_shpe(v->ir); + } + + struct ir3_instruction *alias = + ir3_instr_create_at(ir3_before_instr(shpe), OPC_ALIAS, 1, 2); + alias->cat7.alias_scope = ALIAS_RT; + ir3_dst_create(alias, regid(rt, comp), IR3_REG_RT); + + unsigned src_flags = + comp_src->flags & (IR3_REG_HALF | IR3_REG_CONST | IR3_REG_IMMED); + ir3_src_create(alias, comp_src->num, src_flags)->uim_val = + comp_src->uim_val; + + if (src_instr->opc == OPC_MOV) { + /* The float type bit seems entirely optional (i.e., it only affects + * disassembly) but since we have this info for movs, we might as + * well set it. + */ + alias->cat7.alias_type_float = type_float(src_instr->cat1.dst_type); + } + + if (comp_src->flags & IR3_REG_CONST) { + /* alias.rt seems to read const registers (as opposed to storing a + * reference in the alias table) so we have to make sure it's + * scheduled after const writes. + */ + alias->barrier_class = alias->barrier_conflict = + IR3_BARRIER_CONST_W; + } + + /* Nothing actually uses the alias.rt dst so make sure it doesn't get + * DCE'd. + */ + array_insert(shpe->block, shpe->block->keeps, alias); + + output->aliased_components |= (1 << comp); + progress = true; + } + + /* Remove the aliased components from the src so that they can be DCE'd. + */ + src->wrmask &= ~output->aliased_components; + + if (!src->wrmask) { + src->def = NULL; + } + } + + return progress; +} + +/* Replace const and immediate components of the RT sources of end with alias.rt + * instructions in the preamble. + */ +bool +ir3_create_alias_rt(struct ir3 *ir, struct ir3_shader_variant *v) +{ + if (!ir->compiler->has_alias) + return false; + if (ir3_shader_debug & IR3_DBG_NOALIASRT) + return false; + if (v->type != MESA_SHADER_FRAGMENT) + return false; + if (v->shader_options.fragdata_dynamic_remap) + return false; + + struct ir3_instruction *end = ir3_find_end(ir); + assert(end->opc == OPC_END); + return create_output_aliases(v, end); +} diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index a3f9ebd1433..131a3162169 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -35,6 +35,7 @@ static const struct debug_named_value shader_debug_options[] = { {"nodescprefetch", IR3_DBG_NODESCPREFETCH, "Disable descriptor prefetch optimization"}, {"expandrpt", IR3_DBG_EXPANDRPT, "Expand rptN instructions"}, {"noaliastex", IR3_DBG_NOALIASTEX, "Don't use alias.tex"}, + {"noaliasrt", IR3_DBG_NOALIASRT, "Don't use alias.rt"}, #if MESA_DEBUG /* MESA_DEBUG-only options: */ {"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"}, diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 1fc0f80f764..27a3627339a 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -346,6 +346,7 @@ enum ir3_shader_debug { IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20), IR3_DBG_RAMSGS = BITFIELD_BIT(21), IR3_DBG_NOALIASTEX = BITFIELD_BIT(22), + IR3_DBG_NOALIASRT = BITFIELD_BIT(23), }; extern enum ir3_shader_debug ir3_shader_debug; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 4d092573fbe..7200250fed3 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -5797,6 +5797,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, } while (progress); progress = IR3_PASS(ir, ir3_create_alias_tex_regs); + progress |= IR3_PASS(ir, ir3_create_alias_rt, so); if (progress) { IR3_PASS(ir, ir3_dce, so);