From 0aa9678d4d0ea0303e907bd3a4b362c08121ae89 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Wed, 22 Jan 2025 15:33:48 +0100 Subject: [PATCH] ir3: add support for alias.rt a7xx introduced support for aliasing render target components using alias.rt. This allows components to be bound to uniform (const or immediate) values in the preamble: alias.rt.f32.0 rt0.y, c0.x alias.rt.f32.0 rt1.z, (1.000000) This aliases the 2nd component of RT0 to c0.x and the 3rd component of RT1 to the immediate 1.0. All components of all 8 render targets can be aliased. This is implemented by replacing const and immediate components of the RT sources of end with alias.rt instructions in the preamble. If no preamble exists, an empty one is created. Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3.h | 1 + src/freedreno/ir3/ir3_alias.c | 123 +++++++++++++++++++++++++++ src/freedreno/ir3/ir3_compiler.c | 1 + src/freedreno/ir3/ir3_compiler.h | 1 + src/freedreno/ir3/ir3_compiler_nir.c | 1 + 5 files changed, 127 insertions(+) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 12eb373b045..2bcaa274fac 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -2272,6 +2272,7 @@ bool ir3_merge_rpt(struct ir3 *ir, struct ir3_shader_variant *v); bool ir3_opt_predicates(struct ir3 *ir, struct ir3_shader_variant *v); bool ir3_create_alias_tex_regs(struct ir3 *ir); bool ir3_insert_alias_tex(struct ir3 *ir); +bool ir3_create_alias_rt(struct ir3 *ir, struct ir3_shader_variant *v); /* unreachable block elimination: */ bool ir3_remove_unreachable(struct ir3 *ir); diff --git a/src/freedreno/ir3/ir3_alias.c b/src/freedreno/ir3/ir3_alias.c index b0aeea9daa8..406bd53c64d 100644 --- a/src/freedreno/ir3/ir3_alias.c +++ b/src/freedreno/ir3/ir3_alias.c @@ -637,3 +637,126 @@ ir3_insert_alias_tex(struct ir3 *ir) return progress; } + +static struct ir3_instruction * +get_or_create_shpe(struct ir3 *ir) +{ + struct ir3_instruction *shpe = ir3_find_shpe(ir); + + if (!shpe) { + shpe = ir3_create_empty_preamble(ir); + assert(shpe); + } + + return shpe; +} + +static bool +create_output_aliases(struct ir3_shader_variant *v, struct ir3_instruction *end) +{ + bool progress = false; + struct ir3_instruction *shpe = NULL; + + foreach_src_n (src, src_n, end) { + struct ir3_shader_output *output = &v->outputs[end->end.outidxs[src_n]]; + + if (output->slot < FRAG_RESULT_DATA0 || + output->slot > FRAG_RESULT_DATA7) { + continue; + } + + assert(src->flags & IR3_REG_SSA); + struct ir3_instruction *src_instr = src->def->instr; + + if (src_instr->opc != OPC_META_COLLECT && src_instr->opc != OPC_MOV) { + continue; + } + + unsigned rt = output->slot - FRAG_RESULT_DATA0; + + foreach_src_n (comp_src, comp, src_instr) { + if (!(comp_src->flags & (IR3_REG_IMMED | IR3_REG_CONST))) { + /* Only const and immediate values can be aliased. */ + continue; + } + + if ((comp_src->flags & IR3_REG_HALF) && + (comp_src->flags & IR3_REG_CONST)) { + /* alias.rt doesn't seem to work with half const. + * TODO figure out what's going wrong here. Might just be + * unsupported because the blob only uses it in one CTS test. + */ + continue; + } + + if (!shpe) { + shpe = get_or_create_shpe(v->ir); + } + + struct ir3_instruction *alias = + ir3_instr_create_at(ir3_before_instr(shpe), OPC_ALIAS, 1, 2); + alias->cat7.alias_scope = ALIAS_RT; + ir3_dst_create(alias, regid(rt, comp), IR3_REG_RT); + + unsigned src_flags = + comp_src->flags & (IR3_REG_HALF | IR3_REG_CONST | IR3_REG_IMMED); + ir3_src_create(alias, comp_src->num, src_flags)->uim_val = + comp_src->uim_val; + + if (src_instr->opc == OPC_MOV) { + /* The float type bit seems entirely optional (i.e., it only affects + * disassembly) but since we have this info for movs, we might as + * well set it. + */ + alias->cat7.alias_type_float = type_float(src_instr->cat1.dst_type); + } + + if (comp_src->flags & IR3_REG_CONST) { + /* alias.rt seems to read const registers (as opposed to storing a + * reference in the alias table) so we have to make sure it's + * scheduled after const writes. + */ + alias->barrier_class = alias->barrier_conflict = + IR3_BARRIER_CONST_W; + } + + /* Nothing actually uses the alias.rt dst so make sure it doesn't get + * DCE'd. + */ + array_insert(shpe->block, shpe->block->keeps, alias); + + output->aliased_components |= (1 << comp); + progress = true; + } + + /* Remove the aliased components from the src so that they can be DCE'd. + */ + src->wrmask &= ~output->aliased_components; + + if (!src->wrmask) { + src->def = NULL; + } + } + + return progress; +} + +/* Replace const and immediate components of the RT sources of end with alias.rt + * instructions in the preamble. + */ +bool +ir3_create_alias_rt(struct ir3 *ir, struct ir3_shader_variant *v) +{ + if (!ir->compiler->has_alias) + return false; + if (ir3_shader_debug & IR3_DBG_NOALIASRT) + return false; + if (v->type != MESA_SHADER_FRAGMENT) + return false; + if (v->shader_options.fragdata_dynamic_remap) + return false; + + struct ir3_instruction *end = ir3_find_end(ir); + assert(end->opc == OPC_END); + return create_output_aliases(v, end); +} diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index a3f9ebd1433..131a3162169 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -35,6 +35,7 @@ static const struct debug_named_value shader_debug_options[] = { {"nodescprefetch", IR3_DBG_NODESCPREFETCH, "Disable descriptor prefetch optimization"}, {"expandrpt", IR3_DBG_EXPANDRPT, "Expand rptN instructions"}, {"noaliastex", IR3_DBG_NOALIASTEX, "Don't use alias.tex"}, + {"noaliasrt", IR3_DBG_NOALIASRT, "Don't use alias.rt"}, #if MESA_DEBUG /* MESA_DEBUG-only options: */ {"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"}, diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 1fc0f80f764..27a3627339a 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -346,6 +346,7 @@ enum ir3_shader_debug { IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20), IR3_DBG_RAMSGS = BITFIELD_BIT(21), IR3_DBG_NOALIASTEX = BITFIELD_BIT(22), + IR3_DBG_NOALIASRT = BITFIELD_BIT(23), }; extern enum ir3_shader_debug ir3_shader_debug; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 4d092573fbe..7200250fed3 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -5797,6 +5797,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, } while (progress); progress = IR3_PASS(ir, ir3_create_alias_tex_regs); + progress |= IR3_PASS(ir, ir3_create_alias_rt, so); if (progress) { IR3_PASS(ir, ir3_dce, so);