From 2be37a91fac9b9fbfda2b17fba7653958abb51a2 Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Mon, 17 Feb 2025 18:42:49 +0100 Subject: [PATCH] aco/live_var_analysis: Handle calls Part-of: --- src/amd/compiler/aco_live_var_analysis.cpp | 42 +++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 5571ef74442..a02c0e619e4 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -186,7 +186,8 @@ process_live_temps_per_block(live_ctx& ctx, Block* block) break; ctx.program->needs_vcc |= instr_needs_vcc(insn); - insn->register_demand = RegisterDemand(new_demand.vgpr, new_demand.sgpr); + RegisterDemand demand_after_instr = RegisterDemand(new_demand.vgpr, new_demand.sgpr); + insn->register_demand = demand_after_instr; bool has_vgpr_def = false; @@ -371,6 +372,45 @@ process_live_temps_per_block(live_ctx& ctx, Block* block) } } + if (insn->isCall()) { + /* For call instructions, definitions are live at the time s_setpc finishes, + * which continues execution in the callee. This means that all definitions are + * live concurrently with operands. + */ + operand_demand += insn->definitions[0].getTemp(); + if (insn->definitions[1].physReg() == vcc) + operand_demand += insn->definitions[1].getTemp(); + + RegisterDemand limit = get_addr_regs_from_waves(ctx.program, ctx.program->min_waves); + insn->call().callee_preserved_limit = RegisterDemand(); + + BITSET_DECLARE(preserved_regs, 512); + insn->call().abi.preservedRegisters(preserved_regs, limit); + for (auto& op : insn->operands) { + if (!op.isTemp() || !op.isPrecolored() || op.isClobbered()) + continue; + + if (op.isKill()) + insn->call().callee_preserved_limit -= op.getTemp(); + for (unsigned i = 0; i < op.size(); ++i) + BITSET_SET(preserved_regs, op.physReg().reg() + i); + } + + RegisterDemand preserved_reg_demand; + preserved_reg_demand.sgpr = + __bitset_prefix_sum(preserved_regs, limit.sgpr, 256 / BITSET_WORDBITS); + preserved_reg_demand.vgpr = __bitset_prefix_sum(preserved_regs + 256 / BITSET_WORDBITS, + limit.vgpr, 256 / BITSET_WORDBITS); + insn->call().callee_preserved_limit += preserved_reg_demand; + + insn->call().caller_preserved_demand = demand_after_instr; + + for (unsigned i = 0; i < insn->definitions.size(); ++i) { + if (!insn->definitions[i].isKill()) + insn->call().caller_preserved_demand -= insn->definitions[i].getTemp(); + } + } + operand_demand += new_demand; insn->register_demand.update(operand_demand); block->register_demand.update(insn->register_demand);