radeon/llvm: Lower branch/branch_cond into predicated jump

Signed-off-by: Tom Stellard <thomas.stellard@amd.com>
This commit is contained in:
Vincent Lejeune 2012-08-01 22:49:42 +02:00 committed by Tom Stellard
parent 6db2e9fdb0
commit 0eca5fd919
7 changed files with 278 additions and 145 deletions

View file

@ -97,124 +97,6 @@ bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
return false;
}
bool AMDGPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
bool retVal = true;
return retVal;
MachineBasicBlock::iterator iter = MBB.begin();
if (!getNextBranchInstr(iter, MBB)) {
retVal = false;
} else {
MachineInstr *firstBranch = iter;
if (!getNextBranchInstr(++iter, MBB)) {
if (firstBranch->getOpcode() == AMDGPU::BRANCH) {
TBB = firstBranch->getOperand(0).getMBB();
firstBranch->eraseFromParent();
retVal = false;
} else {
TBB = firstBranch->getOperand(0).getMBB();
FBB = *(++MBB.succ_begin());
if (FBB == TBB) {
FBB = *(MBB.succ_begin());
}
Cond.push_back(firstBranch->getOperand(1));
retVal = false;
}
} else {
MachineInstr *secondBranch = iter;
if (!getNextBranchInstr(++iter, MBB)) {
if (secondBranch->getOpcode() == AMDGPU::BRANCH) {
TBB = firstBranch->getOperand(0).getMBB();
Cond.push_back(firstBranch->getOperand(1));
FBB = secondBranch->getOperand(0).getMBB();
secondBranch->eraseFromParent();
retVal = false;
} else {
assert(0 && "Should not have two consecutive conditional branches");
}
} else {
MBB.getParent()->viewCFG();
assert(0 && "Should not have three branch instructions in"
" a single basic block");
retVal = false;
}
}
}
return retVal;
}
unsigned int AMDGPUInstrInfo::getBranchInstr(const MachineOperand &op) const {
const MachineInstr *MI = op.getParent();
switch (MI->getDesc().OpInfo->RegClass) {
default: // FIXME: fallthrough??
case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
};
}
unsigned int
AMDGPUInstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const
{
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
for (unsigned int x = 0; x < Cond.size(); ++x) {
Cond[x].getParent()->dump();
}
if (FBB == 0) {
if (Cond.empty()) {
BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB);
} else {
BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
.addMBB(TBB).addReg(Cond[0].getReg());
}
return 1;
} else {
BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
.addMBB(TBB).addReg(Cond[0].getReg());
BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB);
}
assert(0 && "Inserting two branches not supported");
return 0;
}
unsigned int AMDGPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) {
return 0;
}
--I;
switch (I->getOpcode()) {
default:
return 0;
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
case AMDGPU::BRANCH:
I->eraseFromParent();
break;
}
I = MBB.end();
if (I == MBB.begin()) {
return 1;
}
--I;
switch (I->getOpcode()) {
// FIXME: only one case??
default:
return 1;
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
I->eraseFromParent();
break;
}
return 2;
}
MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator tmp = MBB->end();
if (!MBB->size()) {

View file

@ -49,8 +49,6 @@ private:
TargetMachine &TM;
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const;
unsigned int getBranchInstr(const MachineOperand &op) const;
public:
explicit AMDGPUInstrInfo(TargetMachine &tm);
@ -77,18 +75,6 @@ public:
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const;
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
unsigned
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,

View file

@ -301,6 +301,7 @@ public:
bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
private:
void reversePredicateSetter(typename BlockT::iterator);
void orderBlocks();
void printOrderedBlocks(llvm::raw_ostream &OS);
int patternMatch(BlockT *CurBlock);
@ -1663,6 +1664,31 @@ void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
retireBlock(dstBlk, landBlk);
} //mergeLooplandBlock
template<class PassT>
void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
{
while (I--) {
if (I->getOpcode() == AMDGPU::PRED_X) {
switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
case OPCODE_IS_ZERO_INT:
static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
return;
case OPCODE_IS_NOT_ZERO_INT:
static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
return;
case OPCODE_IS_ZERO:
static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
return;
case OPCODE_IS_NOT_ZERO:
static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
return;
default:
assert(0 && "PRED_X Opcode invalid!");
}
}
}
}
template<class PassT>
void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
BlockT *exitBlk,
@ -1695,14 +1721,17 @@ void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
//break_logical
int newOpcode =
(trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
: CFGTraits::getBreakZeroOpcode(oldOpcode);
if (trueBranch != exitBlk) {
reversePredicateSetter(branchInstrPos);
}
int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
} else {
int newOpcode =
(trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
: CFGTraits::getBranchZeroOpcode(oldOpcode);
if (trueBranch != exitBlk) {
reversePredicateSetter(branchInstr);
}
int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
if (exitBlk != exitLandBlk) {
//splice is insert-before ...
@ -2765,7 +2794,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getBreakNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ);
case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
default:
assert(0 && "internal error");
};
@ -2774,7 +2803,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getBreakZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ);
case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
default:
assert(0 && "internal error");
};
@ -2783,6 +2812,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getBranchNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
default:
@ -2793,6 +2823,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getBranchZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
default:
@ -2804,7 +2835,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getContinueNzeroOpcode(int oldOpcode)
{
switch(oldOpcode) {
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ);
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
default:
assert(0 && "internal error");
};
@ -2813,7 +2844,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static int getContinueZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ);
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
default:
assert(0 && "internal error");
};
@ -2845,6 +2876,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static bool isCondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
case AMDGPU::JUMP:
return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
case AMDGPU::SI_IF_NZ:
case AMDGPU::SI_IF_Z:
@ -2857,8 +2890,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
static bool isUncondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
case AMDGPU::BRANCH:
break;
case AMDGPU::JUMP:
return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
default:
return false;
}

View file

@ -217,7 +217,7 @@ include "AMDILIntrinsics.td"
// Custom Inserter for Branches and returns, this eventually will be a
// seperate pass
//===---------------------------------------------------------------------===//
let isTerminator = 1 in {
let isTerminator = 1, usesCustomInserter = 1 in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;

View file

@ -210,6 +210,33 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
.addReg(t1, RegState::Implicit);
break;
}
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(0);
break;
case AMDGPU::BRANCH_COND_f32:
MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
.addReg(AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_ZERO);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
case AMDGPU::BRANCH_COND_i32:
MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
.addReg(AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_ZERO_INT);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}

View file

@ -17,6 +17,7 @@
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "AMDILUtilityFunctions.h"
#include "AMDGPUUtil.h"
#define GET_INSTRINFO_CTOR
#include "AMDGPUGenDFAPacketizer.inc"
@ -94,6 +95,8 @@ unsigned R600InstrInfo::getIEQOpcode() const
bool R600InstrInfo::isMov(unsigned Opcode) const
{
switch(Opcode) {
default: return false;
case AMDGPU::MOV:
@ -188,6 +191,199 @@ DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
}
static bool
isPredicateSetter(unsigned opcode)
{
switch (opcode) {
case AMDGPU::PRED_X:
return true;
default:
return false;
}
}
static MachineInstr *
findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I)
{
while (I != MBB.begin()) {
--I;
MachineInstr *MI = I;
if (isPredicateSetter(MI->getOpcode()))
return MI;
}
return NULL;
}
bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const
{
// Most of the following comes from the ARM implementation of AnalyzeBranch
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
return false;
--I;
while (I->isDebugValue()) {
if (I == MBB.begin())
return false;
--I;
}
if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
return false;
}
// Get the last instruction in the block.
MachineInstr *LastInst = I;
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() ||
static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
if (LastOpc == AMDGPU::JUMP) {
if(!isPredicated(LastInst)) {
TBB = LastInst->getOperand(0).getMBB();
return false;
} else {
MachineInstr *predSet = I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
}
}
return true; // Can't handle indirect branch.
}
// Get the instruction before it if it is a terminator.
MachineInstr *SecondLastInst = I;
unsigned SecondLastOpc = SecondLastInst->getOpcode();
// If the block ends with a B and a Bcc, handle it.
if (SecondLastOpc == AMDGPU::JUMP &&
isPredicated(SecondLastInst) &&
LastOpc == AMDGPU::JUMP &&
!isPredicated(LastInst)) {
MachineInstr *predSet = --I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = SecondLastInst->getOperand(0).getMBB();
FBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
}
// Otherwise, can't handle this.
return true;
}
int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
const MachineInstr *MI = op.getParent();
switch (MI->getDesc().OpInfo->RegClass) {
default: // FIXME: fallthrough??
case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
};
}
unsigned
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const
{
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
if (FBB == 0) {
if (Cond.empty()) {
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
return 1;
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
assert(PredSet && "No previous predicate !");
PredSet->getOperand(1).addTargetFlag(1<<4);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
return 1;
}
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
assert(PredSet && "No previous predicate !");
PredSet->getOperand(1).addTargetFlag(1<<4);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
return 2;
}
}
unsigned
R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
{
// Note : we leave PRED* instructions there.
// They may be needed when predicating instructions.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) {
return 0;
}
--I;
switch (I->getOpcode()) {
default:
return 0;
case AMDGPU::JUMP:
if (isPredicated(I)) {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
predSet->getOperand(1).setTargetFlags(flag);
}
I->eraseFromParent();
break;
}
I = MBB.end();
if (I == MBB.begin()) {
return 1;
}
--I;
switch (I->getOpcode()) {
// FIXME: only one case??
default:
return 1;
case AMDGPU::JUMP:
if (isPredicated(I)) {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
predSet->getOperand(1).setTargetFlags(flag);
}
I->eraseFromParent();
break;
}
return 2;
}
bool
R600InstrInfo::isPredicated(const MachineInstr *MI) const
{

View file

@ -34,6 +34,8 @@ namespace llvm {
const R600RegisterInfo RI;
AMDGPUTargetMachine &TM;
int getBranchInstr(const MachineOperand &op) const;
public:
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
@ -62,6 +64,13 @@ namespace llvm {
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
bool isPredicated(const MachineInstr *MI) const;
bool isPredicable(MachineInstr *MI) const;