radeon/llvm: Lower loads from USE_SGPR adddress space during DAG lowering

This commit is contained in:
Tom Stellard 2012-07-26 14:30:23 +00:00
parent 40c41fe890
commit 3aaa209293
5 changed files with 50 additions and 66 deletions

View file

@ -311,13 +311,6 @@ bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
return false;
}
void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
MachineFunction * MF, MachineRegisterInfo & MRI,
const TargetInstrInfo * TII, unsigned reg) const
{
AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
}
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {

View file

@ -29,16 +29,6 @@ private:
protected:
/// addLiveIn - This functions adds reg to the live in list of the entry block
/// and emits a copy from reg to MI.getOperand(0).
///
// Some registers are loaded with values before the program
/// begins to execute. The loading of these values is modeled with pseudo
/// instructions which are lowered using this function.
void addLiveIn(MachineInstr * MI, MachineFunction * MF,
MachineRegisterInfo & MRI, const TargetInstrInfo * TII,
unsigned reg) const;
/// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list
/// of the DAG's MachineFunction. This returns a Register SDNode representing
/// Reg.

View file

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "SIISelLowering.h"
#include "AMDIL.h"
#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
@ -47,6 +48,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We need to custom lower loads from the USER_SGPR address space, so we can
// add the SGPRs as livein registers.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@ -126,11 +132,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::SI_V_CNDLT:
LowerSI_V_CNDLT(MI, *BB, I, MRI);
break;
case AMDGPU::USE_SGPR_32:
case AMDGPU::USE_SGPR_64:
lowerUSE_SGPR(MI, BB->getParent(), MRI);
MI->eraseFromParent();
break;
}
return BB;
}
@ -209,21 +210,6 @@ void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
MI->eraseFromParent();
}
void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI,
MachineFunction * MF, MachineRegisterInfo & MRI) const
{
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
unsigned dstReg = MI->getOperand(0).getReg();
int64_t newIndex = MI->getOperand(1).getImm();
const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg);
unsigned DwordWidth = dstClass->getSize() / 4;
assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned");
newIndex = newIndex / DwordWidth;
unsigned newReg = dstClass->getRegister(newIndex);
addLiveIn(MI, MF, MRI, TII, newReg);
}
EVT SITargetLowering::getSetCCResultType(EVT VT) const
{
return MVT::i1;
@ -238,6 +224,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
case ISD::INTRINSIC_WO_CHAIN: {
@ -303,6 +290,48 @@ SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
return Result;
}
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
{
EVT VT = Op.getValueType();
LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
assert(Ptr);
unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
// We only need to lower USER_SGPR address space loads
if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
return SDValue();
}
// Loads from the USER_SGPR address space can only have constant value
// pointers.
ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
assert(BasePtr);
unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
const TargetRegisterClass * dstClass;
switch (TypeDwordWidth) {
default:
assert(!"USER_SGPR value size not implemented");
return SDValue();
case 1:
dstClass = &AMDGPU::SReg_32RegClass;
break;
case 2:
dstClass = &AMDGPU::SReg_64RegClass;
break;
}
uint64_t Index = BasePtr->getZExtValue();
assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
unsigned SGPRIndex = Index / TypeDwordWidth;
unsigned Reg = dstClass->getRegister(SGPRIndex);
DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
VT));
return SDValue();
}
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
{
SDValue LHS = Op.getOperand(0);

View file

@ -35,12 +35,11 @@ class SITargetLowering : public AMDGPUTargetLowering
MachineBasicBlock::iterator I) const;
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void lowerUSE_SGPR(MachineInstr *MI, MachineFunction * MF,
MachineRegisterInfo & MRI) const;
SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
unsigned VCCNode) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
public:

View file

@ -7,19 +7,6 @@
//
//===----------------------------------------------------------------------===//
def load_user_sgpr : PatFrag<(ops node:$ptr),
(load node:$ptr),
[{
const Value *Src = cast<LoadSDNode>(N)->getSrcValue();
if (Src) {
PointerType * PT = dyn_cast<PointerType>(Src->getType());
return PT && PT->getAddressSpace() == AMDGPUAS::USER_SGPR_ADDRESS;
}
return false;
}]
>;
def isSI : Predicate<"Subtarget.device()"
"->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
@ -956,20 +943,6 @@ def SI_INTERP_CONST : InstSI <
imm:$attr, SReg_32:$params))]
>;
def USE_SGPR_32 : InstSI <
(outs SReg_32:$dst),
(ins i32imm:$src0),
"USE_SGPR_32",
[(set (i32 SReg_32:$dst), (load_user_sgpr imm:$src0))]
>;
def USE_SGPR_64 : InstSI <
(outs SReg_64:$dst),
(ins i32imm:$src0),
"USE_SGPR_64",
[(set (i64 SReg_64:$dst), (load_user_sgpr imm:$src0))]
>;
} // end usesCustomInserter
// SI Psuedo branch instructions. These are used by the CFG structurizer pass