From dfc13fcf9f2df74bfc008f41579f07157fe26b0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 4 Jul 2024 11:03:43 +0200 Subject: [PATCH] aco: introduce Operand flag 'CopyKill' This flag indicates that the Operand must be copied in order to satisfy register constraints. The copy is immediately killed by the instruction. Part-of: --- src/amd/compiler/aco_ir.h | 18 ++++++++++++++++-- src/amd/compiler/aco_live_var_analysis.cpp | 19 +++++++++++++++++-- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 7e8168f1185..83cfbb6ac02 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -460,7 +460,7 @@ public: constexpr Operand() : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false), isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), isClobbered_(false), - is16bit_(false), is24bit_(false), signext(false) + isCopyKill_(false), is16bit_(false), is24bit_(false), signext(false) {} explicit Operand(Temp r) noexcept @@ -809,11 +809,24 @@ public: constexpr void setClobbered(bool flag) noexcept { isClobbered_ = flag; } constexpr bool isClobbered() const noexcept { return isClobbered_; } + /* Indicates that the Operand must be copied in order to satisfy register + * constraints. The copy is immediately killed by the instruction. + */ + constexpr void setCopyKill(bool flag) noexcept + { + isCopyKill_ = flag; + if (flag) + setKill(flag); + } + constexpr bool isCopyKill() const noexcept { return isCopyKill_; } + constexpr void setKill(bool flag) noexcept { isKill_ = flag; - if (!flag) + if (!flag) { setFirstKill(false); + setCopyKill(false); + } } constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); } @@ -879,6 +892,7 @@ private: uint8_t constSize : 2; uint8_t isLateKill_ : 1; uint8_t isClobbered_ : 1; + uint8_t isCopyKill_ : 1; uint8_t is16bit_ : 1; uint8_t is24bit_ : 1; uint8_t signext : 1; diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index bc58f176c94..14c6e2f4745 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -42,7 +42,7 @@ get_temp_registers(Instruction* instr) } for (Operand op : instr->operands) { - if (op.isFirstKill()) { + if (op.isFirstKill() || op.isCopyKill()) { demand_before += op.getTemp(); if (op.isLateKill()) demand_after += op.getTemp(); @@ -252,8 +252,12 @@ process_live_temps_per_block(live_ctx& ctx, Block* block) Operand& operand = insn->operands[i]; if (!operand.isTemp()) continue; + + const Temp temp = operand.getTemp(); if (operand.isFixed() && ctx.program->progress < CompilationProgress::after_ra) { + assert(!operand.isLateKill()); ctx.program->needs_vcc |= operand.physReg() == vcc; + /* Check if this operand gets overwritten by a precolored definition. */ if (std::any_of(insn->definitions.begin(), insn->definitions.end(), [=](Definition def) @@ -263,8 +267,19 @@ process_live_temps_per_block(live_ctx& ctx, Block* block) operand.physReg() + operand.size() > def.physReg(); })) operand.setClobbered(true); + + /* Check if this temp is fixed to a different register as well. + * This assumes that operands of one instruction are not precolored twice to + * the same register. In this case, register pressure might be overestimated. + */ + for (unsigned j = i + 1; !operand.isCopyKill() && j < insn->operands.size(); ++j) { + if (insn->operands[j].isTemp() && insn->operands[j].getTemp() == temp && + insn->operands[j].isFixed()) { + operand_demand += temp; + insn->operands[j].setCopyKill(true); + } + } } - const Temp temp = operand.getTemp(); if (operand.isKill()) continue;