radeon/llvm: Remove backend code from Mesa

This code now lives in an external tree.

For the next Mesa release fetch the code from the master branch
of this LLVM repo:
http://cgit.freedesktop.org/~tstellar/llvm/

For all subsequent Mesa releases, fetch the code from the official LLVM
project:
www.llvm.org
This commit is contained in:
Tom Stellard 2013-01-04 15:38:37 +00:00
parent 05c143cc04
commit aed37cbee8
99 changed files with 0 additions and 19168 deletions

View file

@ -1,18 +0,0 @@
AMDGPUInstrEnums.h.include
AMDGPUInstrEnums.include
AMDGPUInstrEnums.td
AMDILGenAsmWriter.inc
AMDILGenCallingConv.inc
AMDILGenCodeEmitter.inc
AMDILGenDAGISel.inc
AMDILGenEDInfo.inc
AMDILGenInstrInfo.inc
AMDILGenIntrinsics.inc
AMDILGenRegisterInfo.inc
AMDILGenSubtargetInfo.inc
R600HwRegInfo.include
R600Intrinsics.td
R600RegisterInfo.td
SIRegisterGetHWRegNum.inc
SIRegisterInfo.td
loader

View file

@ -1,46 +0,0 @@
//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_H
#define AMDGPU_H
#include "AMDGPUTargetMachine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class FunctionPass;
class AMDGPUTargetMachine;
// R600 Passes
FunctionPass* createR600KernelParametersPass(const TargetData* TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
FunctionPass *createSILowerFlowControlPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
// Passes common to R600 and SI
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
} // End namespace llvm
namespace ShaderType {
enum Type {
PIXEL = 0,
VERTEX = 1,
GEOMETRY = 2,
COMPUTE = 3
};
}
#endif // AMDGPU_H

View file

@ -1,38 +0,0 @@
//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
// Include AMDIL TD files
include "AMDILBase.td"
def AMDGPUInstrInfo : InstrInfo {}
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
def AMDGPUAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
int Variant = 0;
bit isMCAsmWriter = 1;
}
def AMDGPU : Target {
// Pull in Instruction Info:
let InstructionSet = AMDGPUInstrInfo;
let AssemblyWriters = [AMDGPUAsmWriter];
}
// Include AMDGPU TD files
include "R600Schedule.td"
include "SISchedule.td"
include "Processors.td"
include "AMDGPUInstrInfo.td"
include "AMDGPUIntrinsics.td"
include "AMDGPURegisterInfo.td"
include "AMDGPUInstructions.td"

View file

@ -1,134 +0,0 @@
//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The AMDGPUAsmPrinter is used to print both assembly string and also binary
// code. When passed an MCAsmStreamer it prints assembly and when passed
// an MCObjectStreamer it outputs binary code.
//
//===----------------------------------------------------------------------===//
//
#include "AMDGPUAsmPrinter.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
MCStreamer &Streamer) {
return new AMDGPUAsmPrinter(tm, Streamer);
}
extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
}
/// runOnMachineFunction - We need to override this function so we can avoid
/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
if (STM.dumpCode()) {
MF.dump();
}
SetupMachineFunction(MF);
if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
EmitProgramInfo(MF);
}
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
return false;
}
void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
const SIRegisterInfo * RI =
static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
MachineOperand & MO = MI.getOperand(op_idx);
unsigned maxUsed;
unsigned width = 0;
bool isSGPR = false;
unsigned reg;
unsigned hwReg;
if (!MO.isReg()) {
continue;
}
reg = MO.getReg();
if (reg == AMDGPU::VCC) {
VCCUsed = true;
continue;
}
switch (reg) {
default: break;
case AMDGPU::EXEC:
case AMDGPU::SI_LITERAL_CONSTANT:
case AMDGPU::SREG_LIT_0:
case AMDGPU::M0:
continue;
}
if (AMDGPU::SReg_32RegClass.contains(reg)) {
isSGPR = true;
width = 1;
} else if (AMDGPU::VReg_32RegClass.contains(reg)) {
isSGPR = false;
width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(reg)) {
isSGPR = true;
width = 2;
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
isSGPR = false;
width = 2;
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
isSGPR = true;
width = 4;
} else if (AMDGPU::VReg_128RegClass.contains(reg)) {
isSGPR = false;
width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(reg)) {
isSGPR = true;
width = 8;
} else {
assert(!"Unknown register class");
}
hwReg = RI->getHWRegNum(reg);
maxUsed = hwReg + width - 1;
if (isSGPR) {
MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
} else {
MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
}
}
}
}
if (VCCUsed) {
MaxSGPR += 2;
}
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
}

View file

@ -1,43 +0,0 @@
//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// AMDGPU Assembly printer class.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_ASMPRINTER_H
#define AMDGPU_ASMPRINTER_H
#include "llvm/CodeGen/AsmPrinter.h"
namespace llvm {
class AMDGPUAsmPrinter : public AsmPrinter {
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
return "AMDGPU Assembly Printer";
}
/// EmitProgramInfo - Emit register usage information so that the GPU driver
/// can correctly setup the GPU state.
void EmitProgramInfo(MachineFunction &MF);
/// EmitInstuction - Implemented in AMDGPUMCInstLower.cpp
virtual void EmitInstruction(const MachineInstr *MI);
};
} // End anonymous llvm
#endif //AMDGPU_ASMPRINTER_H

View file

@ -1,48 +0,0 @@
//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// CodeEmitter interface for R600 and SI codegen.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUCODEEMITTER_H
#define AMDGPUCODEEMITTER_H
namespace llvm {
class AMDGPUCodeEmitter {
public:
uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const { return 0; }
virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
unsigned OpNo) const {
return 0;
}
virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
unsigned OpNo) const {
return 0;
}
virtual uint64_t VOPPostEncode(const MachineInstr &MI,
uint64_t Value) const {
return Value;
}
virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
unsigned OpNo) const {
return 0;
}
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
const {
return 0;
}
};
} // End namespace llvm
#endif // AMDGPUCODEEMITTER_H

View file

@ -1,62 +0,0 @@
//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass lowers AMDIL machine instructions to the appropriate hardware
// instructions.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
namespace {
class AMDGPUConvertToISAPass : public MachineFunctionPass {
private:
static char ID;
TargetMachine &TM;
public:
AMDGPUConvertToISAPass(TargetMachine &tm) :
MachineFunctionPass(ID), TM(tm) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
};
} // End anonymous namespace
char AMDGPUConvertToISAPass::ID = 0;
FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
return new AMDGPUConvertToISAPass(tm);
}
bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
{
const AMDGPUInstrInfo * TII =
static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
}
}
return false;
}

View file

@ -1,351 +0,0 @@
//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This is the parent TargetLowering class for hardware code gen targets.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUISelLowering.h"
#include "AMDILIntrinsicInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
using namespace llvm;
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
TargetLowering(TM, new TargetLoweringObjectFileELF())
{
// Initialize target lowering borrowed from AMDIL
InitAMDILLowering();
// We need to custom lower some of the intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FEXP2, MVT::f32, Legal);
setOperationAction(ISD::FPOW, MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Legal);
setOperationAction(ISD::FABS, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Expand);
}
//===---------------------------------------------------------------------===//
// TargetLowering Callbacks
//===---------------------------------------------------------------------===//
SDValue AMDGPUTargetLowering::LowerFormalArguments(
SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const
{
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
InVals.push_back(SDValue());
}
return Chain;
}
SDValue AMDGPUTargetLowering::LowerReturn(
SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc DL, SelectionDAG &DAG) const
{
return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
}
//===---------------------------------------------------------------------===//
// Target specific lowering
//===---------------------------------------------------------------------===//
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
const
{
switch (Op.getOpcode()) {
default:
Op.getNode()->dump();
assert(0 && "Custom lowering code for this"
"instruction is not implemented yet!");
break;
// AMDIL DAG lowering
case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
// AMDGPU DAG lowering
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
}
return Op;
}
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const
{
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
switch (IntrinsicID) {
default: return Op;
case AMDGPUIntrinsic::AMDIL_abs:
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDIL_exp:
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
case AMDGPUIntrinsic::AMDIL_fraction:
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDIL_mad:
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
case AMDGPUIntrinsic::AMDIL_max:
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imax:
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umax:
return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDIL_min:
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imin:
return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umin:
return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDIL_round_nearest:
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
}
}
///IABS(a) = SMAX(sub(0, a), a)
SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
Op.getOperand(1));
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
}
/// Linear Interpolation
/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
DAG.getConstantFP(1.0f, MVT::f32),
Op.getOperand(1));
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
Op.getOperand(3));
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
Op.getOperand(2),
OneSubAC);
}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue Num = Op.getOperand(0);
SDValue Den = Op.getOperand(1);
SmallVector<SDValue, 8> Results;
// RCP = URECIP(Den) = 2^32 / Den + e
// e is rounding error.
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
// RCP_LO = umulo(RCP, Den) */
SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
// RCP_HI = mulhu (RCP, Den) */
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
// NEG_RCP_LO = -RCP_LO
SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
RCP_LO);
// ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
NEG_RCP_LO, RCP_LO,
ISD::SETEQ);
// Calculate the rounding error from the URECIP instruction
// E = mulhu(ABS_RCP_LO, RCP)
SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
// RCP_A_E = RCP + E
SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
// RCP_S_E = RCP - E
SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
// Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
RCP_A_E, RCP_S_E,
ISD::SETEQ);
// Quotient = mulhu(Tmp0, Num)
SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
// Num_S_Remainder = Quotient * Den
SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
// Remainder = Num - Num_S_Remainder
SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
ISD::SETGE);
// Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
DAG.getConstant(0, VT),
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
ISD::SETGE);
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
Remainder_GE_Zero);
// Calculate Division result:
// Quotient_A_One = Quotient + 1
SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
DAG.getConstant(1, VT));
// Quotient_S_One = Quotient - 1
SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
DAG.getConstant(1, VT));
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
Quotient, Quotient_A_One, ISD::SETEQ);
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
Quotient_S_One, Div, ISD::SETEQ);
// Calculate Rem result:
// Remainder_S_Den = Remainder - Den
SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
// Remainder_A_Den = Remainder + Den
SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
Remainder, Remainder_S_Den, ISD::SETEQ);
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
Remainder_A_Den, Rem, ISD::SETEQ);
DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
return Op;
}
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
{
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->isExactlyValue(1.0);
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
return C->isAllOnesValue();
}
return false;
}
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
{
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->getValueAPF().isZero();
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
return C->isNullValue();
}
return false;
}
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned VirtualRegister;
if (!MRI.isLiveIn(Reg)) {
VirtualRegister = MRI.createVirtualRegister(RC);
MRI.addLiveIn(Reg, VirtualRegister);
} else {
VirtualRegister = MRI.getLiveInVirtReg(Reg);
}
return DAG.getRegister(VirtualRegister, VT);
}
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
{
switch (Opcode) {
default: return 0;
// AMDIL DAG nodes
NODE_NAME_CASE(MAD);
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
NODE_NAME_CASE(DIV_INF);
NODE_NAME_CASE(RET_FLAG);
NODE_NAME_CASE(BRANCH_COND);
// AMDGPU DAG nodes
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(FMAX)
NODE_NAME_CASE(SMAX)
NODE_NAME_CASE(UMAX)
NODE_NAME_CASE(FMIN)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(INTERP)
NODE_NAME_CASE(INTERP_P0)
}
}

View file

@ -1,142 +0,0 @@
//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the interface defintiion of the TargetLowering class
// that is common to all AMD GPUs.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUISELLOWERING_H
#define AMDGPUISELLOWERING_H
#include "llvm/Target/TargetLowering.h"
namespace llvm {
class MachineRegisterInfo;
class AMDGPUTargetLowering : public TargetLowering
{
private:
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
protected:
/// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list
/// of the DAG's MachineFunction. This returns a Register SDNode representing
/// Reg.
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
public:
AMDGPUTargetLowering(TargetMachine &TM);
virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc DL, SelectionDAG &DAG) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
virtual const char* getTargetNodeName(unsigned Opcode) const;
// Functions defined in AMDILISelLowering.cpp
public:
/// computeMaskedBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const;
virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I, unsigned Intrinsic) const;
/// isFPImmLegal - We want to mark f32/f64 floating point values as legal.
bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
/// ShouldShrinkFPConstant - We don't want to shrink f64/f32 constants.
bool ShouldShrinkFPConstant(EVT VT) const;
private:
void InitAMDILLowering();
SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
};
namespace AMDGPUISD
{
enum
{
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
MAD, // 32bit Fused Multiply Add instruction
CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
DIV_INF, // Divide with infinity returned on zero divisor
RET_FLAG,
BRANCH_COND,
// End AMDIL ISD Opcodes
BITALIGN,
FRACT,
FMAX,
SMAX,
UMAX,
FMIN,
SMIN,
UMIN,
URECIP,
INTERP,
INTERP_P0,
LAST_AMDGPU_ISD_NUMBER
};
} // End namespace AMDGPUISD
namespace SIISD {
enum {
SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
VCC_AND,
VCC_BITCAST
};
} // End namespace SIISD
} // End namespace llvm
#endif // AMDGPUISELLOWERING_H

View file

@ -1,258 +0,0 @@
//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the implementation of the TargetInstrInfo class that is
// common to all AMD GPUs.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
#include "AMDIL.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#define GET_INSTRINFO_CTOR
#include "AMDGPUGenInstrInfo.inc"
using namespace llvm;
AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
: AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
}
bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
// TODO: Implement this function
return false;
}
unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
// TODO: Implement this function
return 0;
}
unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
// TODO: Implement this function
return 0;
}
bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const {
// TODO: Implement this function
return false;
}
unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
// TODO: Implement this function
return 0;
}
unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
// TODO: Implement this function
return 0;
}
bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const {
// TODO: Implement this function
return false;
}
MachineInstr *
AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const {
// TODO: Implement this function
return NULL;
}
bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const {
while (iter != MBB.end()) {
switch (iter->getOpcode()) {
default:
break;
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
case AMDGPU::BRANCH:
return true;
};
++iter;
}
return false;
}
MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator tmp = MBB->end();
if (!MBB->size()) {
return MBB->end();
}
while (--tmp) {
if (tmp->getOpcode() == AMDGPU::ENDLOOP
|| tmp->getOpcode() == AMDGPU::ENDIF
|| tmp->getOpcode() == AMDGPU::ELSE) {
if (tmp == MBB->begin()) {
return tmp;
} else {
continue;
}
} else {
return ++tmp;
}
}
return MBB->end();
}
void
AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
assert(!"Not Implemented");
}
void
AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
assert(!"Not Implemented");
}
MachineInstr *
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
// TODO: Implement this function
return 0;
}
MachineInstr*
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
// TODO: Implement this function
return 0;
}
bool
AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const
{
// TODO: Implement this function
return false;
}
bool
AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad,
bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
// TODO: Implement this function
return false;
}
bool
AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
SmallVectorImpl<SDNode*> &NewNodes) const {
// TODO: Implement this function
return false;
}
unsigned
AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex) const {
// TODO: Implement this function
return 0;
}
bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const {
assert(Offset2 > Offset1
&& "Second offset should be larger than first offset!");
// If we have less than 16 loads in a row, and the offsets are within 16,
// then schedule together.
// TODO: Make the loads schedule near if it fits in a cacheline
return (NumLoads < 16 && (Offset2 - Offset1) < 16);
}
bool
AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
const {
// TODO: Implement this function
return true;
}
void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
// TODO: Implement this function
}
bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
// TODO: Implement this function
return false;
}
bool
AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2)
const {
// TODO: Implement this function
return false;
}
bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
// TODO: Implement this function
return false;
}
bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
// TODO: Implement this function
return MI->getDesc().isPredicable();
}
bool
AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
// TODO: Implement this function
return true;
}
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const
{
MachineRegisterInfo &MRI = MF.getRegInfo();
const AMDGPURegisterInfo & RI = getRegisterInfo();
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
MachineOperand &MO = MI.getOperand(i);
// Convert dst regclass to one that is supported by the ISA
if (MO.isReg() && MO.isDef()) {
if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
assert(newRegClass);
MRI.setRegClass(MO.getReg(), newRegClass);
}
}
}
}

View file

@ -1,148 +0,0 @@
//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the definition of a TargetInstrInfo class that is common
// to all AMD GPUs.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUINSTRUCTIONINFO_H_
#define AMDGPUINSTRUCTIONINFO_H_
#include "AMDGPURegisterInfo.h"
#include "AMDGPUInstrInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <map>
#define GET_INSTRINFO_HEADER
#define GET_INSTRINFO_ENUM
#include "AMDGPUGenInstrInfo.inc"
#define OPCODE_IS_ZERO_INT 0x00000042
#define OPCODE_IS_NOT_ZERO_INT 0x00000045
#define OPCODE_IS_ZERO 0x00000020
#define OPCODE_IS_NOT_ZERO 0x00000023
namespace llvm {
class AMDGPUTargetMachine;
class MachineFunction;
class MachineInstr;
class MachineInstrBuilder;
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
private:
const AMDGPURegisterInfo RI;
TargetMachine &TM;
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const;
public:
explicit AMDGPUInstrInfo(TargetMachine &tm);
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DstReg, unsigned &SubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const;
bool hasLoadFromStackSlot(const MachineInstr *MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const;
unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const;
bool hasStoreFromStackSlot(const MachineInstr *MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const;
MachineInstr *
convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const = 0;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
protected:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const;
public:
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
SmallVectorImpl<SDNode *> &NewNodes) const;
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = 0) const;
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
bool isPredicated(const MachineInstr *MI) const;
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const;
bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
bool isPredicable(MachineInstr *MI) const;
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
// Helper functions that check the opcode for status information
bool isLoadInst(llvm::MachineInstr *MI) const;
bool isExtLoadInst(llvm::MachineInstr *MI) const;
bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
bool isSExtLoadInst(llvm::MachineInstr *MI) const;
bool isZExtLoadInst(llvm::MachineInstr *MI) const;
bool isAExtLoadInst(llvm::MachineInstr *MI) const;
bool isStoreInst(llvm::MachineInstr *MI) const;
bool isTruncStoreInst(llvm::MachineInstr *MI) const;
virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const = 0;
virtual unsigned getIEQOpcode() const = 0;
virtual bool isMov(unsigned opcode) const = 0;
/// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
/// MachineInstr
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const;
};
} // End llvm namespace
#endif // AMDGPUINSTRINFO_H_

View file

@ -1,71 +0,0 @@
//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains DAG node defintions for the AMDGPU target.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AMDGPU DAG Profiles
//===----------------------------------------------------------------------===//
def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
// out = ((a << 32) | b) >> c)
//
// Can be used to optimize rtol:
// rotl(a, b) = bitalign(a, a, 32 - b)
def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
// out = a - floor(a)
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
// out = max(a, b) a and b are floats
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = max(a, b) a and b are signed ints
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = max(a, b) a and b are unsigned ints
def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = min(a, b) a and b are floats
def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = min(a, b) a snd b are signed ints
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = min(a, b) a and b are unsigned ints
def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// urecip - This operation is a helper for integer division, it returns the
// result of 1 / a as a fractional unsigned integer.
// out = (2^32 / a) + e
// e is rounding error
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;

View file

@ -1,183 +0,0 @@
//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains instruction defs that are common to all hw codegen
// targets.
//
//===----------------------------------------------------------------------===//
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
field bits<16> AMDILOp = 0;
field bits<3> Gen = 0;
let Namespace = "AMDGPU";
let OutOperandList = outs;
let InOperandList = ins;
let AsmString = asm;
let Pattern = pattern;
let Itinerary = NullALU;
let TSFlags{42-40} = Gen;
let TSFlags{63-48} = AMDILOp;
}
class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
: AMDGPUInst<outs, ins, asm, pattern> {
field bits<32> Inst = 0xffffffff;
}
def COND_EQ : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOEQ: case ISD::SETUEQ:
case ISD::SETEQ: return true;}}}]
>;
def COND_NE : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETONE: case ISD::SETUNE:
case ISD::SETNE: return true;}}}]
>;
def COND_GT : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOGT: case ISD::SETUGT:
case ISD::SETGT: return true;}}}]
>;
def COND_GE : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOGE: case ISD::SETUGE:
case ISD::SETGE: return true;}}}]
>;
def COND_LT : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOLT: case ISD::SETULT:
case ISD::SETLT: return true;}}}]
>;
def COND_LE : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOLE: case ISD::SETULE:
case ISD::SETLE: return true;}}}]
>;
//===----------------------------------------------------------------------===//
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
}
def CONST : Constants;
def FP_ZERO : PatLeaf <
(fpimm),
[{return N->getValueAPF().isZero();}]
>;
def FP_ONE : PatLeaf <
(fpimm),
[{return N->isExactlyValue(1.0);}]
>;
let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
[(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"FABS $dst, $src0",
[(set rc:$dst, (fabs rc:$src0))]
>;
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"FNEG $dst, $src0",
[(set rc:$dst, (fneg rc:$src0))]
>;
def SHADER_TYPE : AMDGPUShaderInst <
(outs),
(ins i32imm:$type),
"SHADER_TYPE $type",
[(int_AMDGPU_shader_type imm:$type)]
>;
} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
/* Generic helper patterns for intrinsics */
/* -------------------------------------- */
class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
RegisterClass rc> : Pat <
(fpow rc:$src0, rc:$src1),
(exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
>;
/* Other helper patterns */
/* --------------------- */
/* Extract element pattern */
class Extract_Element <ValueType sub_type, ValueType vec_type,
RegisterClass vec_class, int sub_idx,
SubRegIndex sub_reg>: Pat<
(sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
(EXTRACT_SUBREG vec_class:$src, sub_reg)
>;
/* Insert element pattern */
class Insert_Element <ValueType elem_type, ValueType vec_type,
RegisterClass elem_class, RegisterClass vec_class,
int sub_idx, SubRegIndex sub_reg> : Pat <
(vec_type (vector_insert (vec_type vec_class:$vec),
(elem_type elem_class:$elem), sub_idx)),
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
>;
// Vector Build pattern
class Vector_Build <ValueType vecType, RegisterClass vectorClass,
ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
(vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
elemClass:$z, sel_z), elemClass:$w, sel_w)
>;
// bitconvert pattern
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
(dt (bitconvert (st rc:$src0))),
(dt rc:$src0)
>;
include "R600Instructions.td"
include "SIInstrInfo.td"

View file

@ -1,63 +0,0 @@
//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines intrinsics that are used by all hw codegen targets.
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
}
let TargetPrefix = "TGSI", isTarget = 1 in {
def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
}
include "SIIntrinsics.td"

View file

@ -1,82 +0,0 @@
//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains code to lower AMDGPU MachineInstrs to their corresponding
// MCInst.
//
//===----------------------------------------------------------------------===//
//
#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Constants.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
AMDGPUMCInstLower::AMDGPUMCInstLower() { }
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
switch (MO.getType()) {
default:
llvm_unreachable("unknown operand type");
case MachineOperand::MO_FPImmediate: {
const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
"Only floating point immediates are supported at the moment.");
MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
break;
}
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
break;
case MachineOperand::MO_Register:
MCOp = MCOperand::CreateReg(MO.getReg());
break;
}
OutMI.addOperand(MCOp);
}
}
void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
AMDGPUMCInstLower MCInstLowering;
// Ignore placeholder instructions:
if (MI->getOpcode() == AMDGPU::MASK_WRITE) {
return;
}
if (MI->isBundle()) {
const MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator I = MI;
++I;
while (I != MBB->end() && I->isInsideBundle()) {
MCInst MCBundleInst;
const MachineInstr *BundledInst = I;
MCInstLowering.lower(BundledInst, MCBundleInst);
OutStreamer.EmitInstruction(MCBundleInst);
++I;
}
} else {
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
}
}

View file

@ -1,30 +0,0 @@
//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_MCINSTLOWER_H
#define AMDGPU_MCINSTLOWER_H
namespace llvm {
class MCInst;
class MachineInstr;
class AMDGPUMCInstLower {
public:
AMDGPUMCInstLower();
/// lower - Lower a MachineInstr to an MCInst
void lower(const MachineInstr *MI, MCInst &OutMI) const;
};
} // End namespace llvm
#endif //AMDGPU_MCINSTLOWER_H

View file

@ -1,50 +0,0 @@
//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Parent TargetRegisterInfo class common to all hw codegen targets.
//
//===----------------------------------------------------------------------===//
#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
using namespace llvm;
AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
const TargetInstrInfo &tii)
: AMDGPUGenRegisterInfo(0),
TM(tm),
TII(tii)
{ }
//===----------------------------------------------------------------------===//
// Function handling callbacks - Functions are a seldom used feature of GPUS, so
// they are not supported at this time.
//===----------------------------------------------------------------------===//
const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
const {
return &CalleeSavedReg;
}
void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj,
RegScavenger *RS) const {
assert(!"Subroutines not supported yet");
}
unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
assert(!"Subroutines not supported yet");
return 0;
}
#define GET_REGINFO_TARGET_DESC
#include "AMDGPUGenRegisterInfo.inc"

View file

@ -1,62 +0,0 @@
//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the TargetRegisterInfo interface that is implemented
// by all hw codegen targets.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUREGISTERINFO_H_
#define AMDGPUREGISTERINFO_H_
#include "llvm/ADT/BitVector.h"
#include "llvm/Target/TargetRegisterInfo.h"
#define GET_REGINFO_HEADER
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
namespace llvm {
class AMDGPUTargetMachine;
class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo
{
TargetMachine &TM;
const TargetInstrInfo &TII;
static const uint16_t CalleeSavedReg;
AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
virtual BitVector getReservedRegs(const MachineFunction &MF) const {
assert(!"Unimplemented"); return BitVector();
}
/// getISARegClass - rc is an AMDIL reg class. This function returns the
/// ISA reg class that is equivalent to the given AMDIL reg class.
virtual const TargetRegisterClass * getISARegClass(
const TargetRegisterClass * rc) const {
assert(!"Unimplemented"); return NULL;
}
virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
assert(!"Unimplemented"); return NULL;
}
const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
RegScavenger *RS) const;
unsigned getFrameRegister(const MachineFunction &MF) const;
};
} // End namespace llvm
#endif // AMDIDSAREGISTERINFO_H_

View file

@ -1,22 +0,0 @@
//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Tablegen register definitions common to all hw codegen targets.
//
//===----------------------------------------------------------------------===//
let Namespace = "AMDGPU" in {
def sel_x : SubRegIndex;
def sel_y : SubRegIndex;
def sel_z : SubRegIndex;
def sel_w : SubRegIndex;
}
include "R600RegisterInfo.td"
include "SIRegisterInfo.td"

View file

@ -1,94 +0,0 @@
//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the AMDGPU specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
using namespace llvm;
#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
AMDGPUGenSubtargetInfo(TT, CPU, FS), mDumpCode(false) {
InstrItins = getInstrItineraryForCPU(CPU);
memset(CapsOverride, 0, sizeof(*CapsOverride)
* AMDGPUDeviceInfo::MaxNumberCapabilities);
// Default card
StringRef GPU = CPU;
mIs64bit = false;
mDefaultSize[0] = 64;
mDefaultSize[1] = 1;
mDefaultSize[2] = 1;
ParseSubtargetFeatures(GPU, FS);
mDevName = GPU;
mDevice = AMDGPUDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
}
AMDGPUSubtarget::~AMDGPUSubtarget()
{
delete mDevice;
}
bool
AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const
{
assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
"Caps index is out of bounds!");
return CapsOverride[caps];
}
bool
AMDGPUSubtarget::is64bit() const
{
return mIs64bit;
}
bool
AMDGPUSubtarget::isTargetELF() const
{
return false;
}
size_t
AMDGPUSubtarget::getDefaultSize(uint32_t dim) const
{
if (dim > 3) {
return 1;
} else {
return mDefaultSize[dim];
}
}
std::string
AMDGPUSubtarget::getDataLayout() const
{
if (!mDevice) {
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
"-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
}
return mDevice->getDataLayout();
}
std::string
AMDGPUSubtarget::getDeviceName() const
{
return mDevName;
}
const AMDGPUDevice *
AMDGPUSubtarget::device() const
{
return mDevice;
}

View file

@ -1,66 +0,0 @@
//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file declares the AMDGPU specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#ifndef _AMDGPUSUBTARGET_H_
#define _AMDGPUSUBTARGET_H_
#include "AMDILDevice.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
#define MAX_CB_SIZE (1 << 16)
namespace llvm {
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo
{
private:
bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
const AMDGPUDevice *mDevice;
size_t mDefaultSize[3];
size_t mMinimumSize[3];
std::string mDevName;
bool mIs64bit;
bool mIs32on64bit;
bool mDumpCode;
bool mR600ALUInst;
InstrItineraryData InstrItins;
public:
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
virtual ~AMDGPUSubtarget();
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
bool isOverride(AMDGPUDeviceInfo::Caps) const;
bool is64bit() const;
// Helper functions to simplify if statements
bool isTargetELF() const;
const AMDGPUDevice* device() const;
std::string getDataLayout() const;
std::string getDeviceName() const;
virtual size_t getDefaultSize(uint32_t dim) const;
bool dumpCode() const { return mDumpCode; }
bool r600ALUEncoding() const { return mR600ALUInst; }
};
} // End namespace llvm
#endif // AMDGPUSUBTARGET_H_

View file

@ -1,143 +0,0 @@
//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The AMDGPU target machine contains all of the hardware specific information
// needed to emit code for R600 and SI GPUs.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/PassManager.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include <llvm/CodeGen/Passes.h>
using namespace llvm;
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
}
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
TargetOptions Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OptLevel
)
:
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
Subtarget(TT, CPU, FS),
DataLayout(Subtarget.getDataLayout()),
FrameLowering(TargetFrameLowering::StackGrowsUp,
Subtarget.device()->getStackAlignment(), 0),
IntrinsicInfo(this),
InstrItins(&Subtarget.getInstrItineraryData()),
mDump(false)
{
// TLInfo uses InstrInfo so it must be initialized after.
if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
InstrInfo = new R600InstrInfo(*this);
TLInfo = new R600TargetLowering(*this);
} else {
InstrInfo = new SIInstrInfo(*this);
TLInfo = new SITargetLowering(*this);
}
}
AMDGPUTargetMachine::~AMDGPUTargetMachine()
{
}
namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM<AMDGPUTargetMachine>();
}
virtual bool addPreISel();
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
virtual bool addPostRegAlloc();
virtual bool addPreSched2();
virtual bool addPreEmitPass();
};
} // End of anonymous namespace
TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
return new AMDGPUPassConfig(this, PM);
}
bool
AMDGPUPassConfig::addPreISel()
{
return false;
}
bool AMDGPUPassConfig::addInstSelector() {
PM->add(createAMDGPUPeepholeOpt(*TM));
PM->add(createAMDGPUISelDag(getAMDGPUTargetMachine()));
return false;
}
bool AMDGPUPassConfig::addPreRegAlloc() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
PM->add(createSIAssignInterpRegsPass(*TM));
}
PM->add(createAMDGPUConvertToISAPass(*TM));
return false;
}
bool AMDGPUPassConfig::addPostRegAlloc() {
return false;
}
bool AMDGPUPassConfig::addPreSched2() {
addPass(IfConverterID);
return false;
}
bool AMDGPUPassConfig::addPreEmitPass() {
PM->add(createAMDGPUCFGPreparationPass(*TM));
PM->add(createAMDGPUCFGStructurizerPass(*TM));
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
PM->add(createR600ExpandSpecialInstrsPass(*TM));
addPass(FinalizeMachineBundlesID);
} else {
PM->add(createSILowerLiteralConstantsPass(*TM));
// piglit is unreliable (VM protection faults, GPU lockups) with this pass:
//PM->add(createSILowerFlowControlPass(*TM));
}
return false;
}

View file

@ -1,70 +0,0 @@
//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The AMDGPU TargetMachine interface definition for hw codgen targets.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_TARGET_MACHINE_H
#define AMDGPU_TARGET_MACHINE_H
#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDILFrameLowering.h"
#include "AMDILIntrinsicInfo.h"
#include "R600ISelLowering.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Target/TargetData.h"
namespace llvm {
MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
class AMDGPUTargetMachine : public LLVMTargetMachine {
AMDGPUSubtarget Subtarget;
const TargetData DataLayout;
AMDGPUFrameLowering FrameLowering;
AMDGPUIntrinsicInfo IntrinsicInfo;
const AMDGPUInstrInfo * InstrInfo;
AMDGPUTargetLowering * TLInfo;
const InstrItineraryData* InstrItins;
bool mDump;
public:
AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
StringRef CPU,
TargetOptions Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
virtual const AMDGPUFrameLowering* getFrameLowering() const {
return &FrameLowering;
}
virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
return &IntrinsicInfo;
}
virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
virtual const AMDGPURegisterInfo *getRegisterInfo() const {
return &InstrInfo->getRegisterInfo();
}
virtual AMDGPUTargetLowering * getTargetLowering() const {
return TLInfo;
}
virtual const InstrItineraryData* getInstrItineraryData() const {
return InstrItins;
}
virtual const TargetData* getTargetData() const { return &DataLayout; }
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
};
} // End namespace llvm
#endif // AMDGPU_TARGET_MACHINE_H

View file

@ -1,106 +0,0 @@
//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file contains the entry points for global functions defined in the LLVM
// AMDGPU back-end.
//
//===----------------------------------------------------------------------===//
#ifndef AMDIL_H_
#define AMDIL_H_
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetMachine.h"
#define ARENA_SEGMENT_RESERVED_UAVS 12
#define DEFAULT_ARENA_UAV_ID 8
#define DEFAULT_RAW_UAV_ID 7
#define GLOBAL_RETURN_RAW_UAV_ID 11
#define HW_MAX_NUM_CB 8
#define MAX_NUM_UNIQUE_UAVS 8
#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
#define OPENCL_MAX_READ_IMAGES 128
#define OPENCL_MAX_WRITE_IMAGES 8
#define OPENCL_MAX_SAMPLERS 16
// The next two values can never be zero, as zero is the ID that is
// used to assert against.
#define DEFAULT_LDS_ID 1
#define DEFAULT_GDS_ID 1
#define DEFAULT_SCRATCH_ID 1
#define DEFAULT_VEC_SLOTS 8
#define OCL_DEVICE_RV710 0x0001
#define OCL_DEVICE_RV730 0x0002
#define OCL_DEVICE_RV770 0x0004
#define OCL_DEVICE_CEDAR 0x0008
#define OCL_DEVICE_REDWOOD 0x0010
#define OCL_DEVICE_JUNIPER 0x0020
#define OCL_DEVICE_CYPRESS 0x0040
#define OCL_DEVICE_CAICOS 0x0080
#define OCL_DEVICE_TURKS 0x0100
#define OCL_DEVICE_BARTS 0x0200
#define OCL_DEVICE_CAYMAN 0x0400
#define OCL_DEVICE_ALL 0x3FFF
/// The number of function ID's that are reserved for
/// internal compiler usage.
const unsigned int RESERVED_FUNCS = 1024;
namespace llvm {
class AMDGPUInstrPrinter;
class FunctionPass;
class MCAsmInfo;
class raw_ostream;
class Target;
class TargetMachine;
/// Instruction selection passes.
FunctionPass*
createAMDGPUISelDag(TargetMachine &TM);
FunctionPass*
createAMDGPUPeepholeOpt(TargetMachine &TM);
/// Pre emit passes.
FunctionPass*
createAMDGPUCFGPreparationPass(TargetMachine &TM);
FunctionPass*
createAMDGPUCFGStructurizerPass(TargetMachine &TM);
extern Target TheAMDGPUTarget;
} // end namespace llvm;
/// Include device information enumerations
#include "AMDILDeviceInfo.h"
namespace llvm {
/// OpenCL uses address spaces to differentiate between
/// various memory regions on the hardware. On the CPU
/// all of the address spaces point to the same memory,
/// however on the GPU, each address space points to
/// a seperate piece of memory that is unique from other
/// memory locations.
namespace AMDGPUAS {
enum AddressSpaces {
PRIVATE_ADDRESS = 0, // Address space for private memory.
GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
CONSTANT_ADDRESS = 2, // Address space for constant memory.
LOCAL_ADDRESS = 3, // Address space for local memory.
REGION_ADDRESS = 4, // Address space for region memory.
ADDRESS_NONE = 5, // Address space for unknown memory.
PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
LAST_ADDRESS = 9
};
} // namespace AMDGPUAS
} // end namespace llvm
#endif // AMDIL_H_

View file

@ -1,129 +0,0 @@
//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
#include "AMDIL7XXDevice.h"
#include "AMDGPUSubtarget.h"
#include "AMDILDevice.h"
using namespace llvm;
AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST)
{
setCaps();
std::string name = mSTM->getDeviceName();
if (name == "rv710") {
mDeviceFlag = OCL_DEVICE_RV710;
} else if (name == "rv730") {
mDeviceFlag = OCL_DEVICE_RV730;
} else {
mDeviceFlag = OCL_DEVICE_RV770;
}
}
AMDGPU7XXDevice::~AMDGPU7XXDevice()
{
}
void AMDGPU7XXDevice::setCaps()
{
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
}
size_t AMDGPU7XXDevice::getMaxLDSSize() const
{
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return MAX_LDS_SIZE_700;
}
return 0;
}
size_t AMDGPU7XXDevice::getWavefrontSize() const
{
return AMDGPUDevice::HalfWavefrontSize;
}
uint32_t AMDGPU7XXDevice::getGeneration() const
{
return AMDGPUDeviceInfo::HD4XXX;
}
uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const
{
switch (DeviceID) {
default:
assert(0 && "ID type passed in is unknown!");
break;
case GLOBAL_ID:
case CONSTANT_ID:
case RAW_UAV_ID:
case ARENA_UAV_ID:
break;
case LDS_ID:
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return DEFAULT_LDS_ID;
}
break;
case SCRATCH_ID:
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
return DEFAULT_SCRATCH_ID;
}
break;
case GDS_ID:
assert(0 && "GDS UAV ID is not supported on this chip");
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
return DEFAULT_GDS_ID;
}
break;
};
return 0;
}
uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const
{
return 1;
}
AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST)
{
setCaps();
}
AMDGPU770Device::~AMDGPU770Device()
{
}
void AMDGPU770Device::setCaps()
{
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
mSWBits.set(AMDGPUDeviceInfo::FMA);
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
}
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
mHWBits.reset(AMDGPUDeviceInfo::LongOps);
mSWBits.set(AMDGPUDeviceInfo::LongOps);
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
}
size_t AMDGPU770Device::getWavefrontSize() const
{
return AMDGPUDevice::WavefrontSize;
}
AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST)
{
}
AMDGPU710Device::~AMDGPU710Device()
{
}
size_t AMDGPU710Device::getWavefrontSize() const
{
return AMDGPUDevice::QuarterWavefrontSize;
}

View file

@ -1,70 +0,0 @@
//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Interface for the subtarget data classes.
//
//===----------------------------------------------------------------------===//
// This file will define the interface that each generation needs to
// implement in order to correctly answer queries on the capabilities of the
// specific hardware.
//===----------------------------------------------------------------------===//
#ifndef _AMDIL7XXDEVICEIMPL_H_
#define _AMDIL7XXDEVICEIMPL_H_
#include "AMDILDevice.h"
namespace llvm {
class AMDGPUSubtarget;
//===----------------------------------------------------------------------===//
// 7XX generation of devices and their respective sub classes
//===----------------------------------------------------------------------===//
// The AMDGPU7XXDevice class represents the generic 7XX device. All 7XX
// devices are derived from this class. The AMDGPU7XX device will only
// support the minimal features that are required to be considered OpenCL 1.0
// compliant and nothing more.
class AMDGPU7XXDevice : public AMDGPUDevice {
public:
AMDGPU7XXDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPU7XXDevice();
virtual size_t getMaxLDSSize() const;
virtual size_t getWavefrontSize() const;
virtual uint32_t getGeneration() const;
virtual uint32_t getResourceID(uint32_t DeviceID) const;
virtual uint32_t getMaxNumUAVs() const;
protected:
virtual void setCaps();
}; // AMDGPU7XXDevice
// The AMDGPU770Device class represents the RV770 chip and it's
// derivative cards. The difference between this device and the base
// class is this device device adds support for double precision
// and has a larger wavefront size.
class AMDGPU770Device : public AMDGPU7XXDevice {
public:
AMDGPU770Device(AMDGPUSubtarget *ST);
virtual ~AMDGPU770Device();
virtual size_t getWavefrontSize() const;
private:
virtual void setCaps();
}; // AMDGPU770Device
// The AMDGPU710Device class derives from the 7XX base class, but this
// class is a smaller derivative, so we need to overload some of the
// functions in order to correctly specify this information.
class AMDGPU710Device : public AMDGPU7XXDevice {
public:
AMDGPU710Device(AMDGPUSubtarget *ST);
virtual ~AMDGPU710Device();
virtual size_t getWavefrontSize() const;
}; // AMDGPU710Device
} // namespace llvm
#endif // _AMDILDEVICEIMPL_H_

View file

@ -1,85 +0,0 @@
//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Target-independent interfaces which we are implementing
//===----------------------------------------------------------------------===//
include "llvm/Target/Target.td"
// Dummy Instruction itineraries for pseudo instructions
def ALU_NULL : FuncUnit;
def NullALU : InstrItinClass;
//===----------------------------------------------------------------------===//
// AMDIL Subtarget features.
//===----------------------------------------------------------------------===//
def FeatureFP64 : SubtargetFeature<"fp64",
"CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
"true",
"Enable 64bit double precision operations">;
def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
"CapsOverride[AMDGPUDeviceInfo::ByteStores]",
"true",
"Enable byte addressable stores">;
def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
"CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
"true",
"Enable duplicate barrier detection(HD5XXX or later).">;
def FeatureImages : SubtargetFeature<"images",
"CapsOverride[AMDGPUDeviceInfo::Images]",
"true",
"Enable image functions">;
def FeatureMultiUAV : SubtargetFeature<"multi_uav",
"CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
"true",
"Generate multiple UAV code(HD5XXX family or later)">;
def FeatureMacroDB : SubtargetFeature<"macrodb",
"CapsOverride[AMDGPUDeviceInfo::MacroDB]",
"true",
"Use internal macrodb, instead of macrodb in driver">;
def FeatureNoAlias : SubtargetFeature<"noalias",
"CapsOverride[AMDGPUDeviceInfo::NoAlias]",
"true",
"assert that all kernel argument pointers are not aliased">;
def FeatureNoInline : SubtargetFeature<"no-inline",
"CapsOverride[AMDGPUDeviceInfo::NoInline]",
"true",
"specify whether to not inline functions">;
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
"mIs64bit",
"false",
"Specify if 64bit addressing should be used.">;
def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
"mIs32on64bit",
"false",
"Specify if 64bit sized pointers with 32bit addressing should be used.">;
def FeatureDebug : SubtargetFeature<"debug",
"CapsOverride[AMDGPUDeviceInfo::Debug]",
"true",
"Debug mode is enabled, so disable hardware accelerated address spaces.">;
def FeatureDumpCode : SubtargetFeature <"DumpCode",
"mDumpCode",
"true",
"Dump MachineInstrs in the CodeEmitter">;
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
"mR600ALUInst",
"false",
"Older version of ALU instructions encoding.">;
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
include "AMDILRegisterInfo.td"
include "AMDILInstrInfo.td"

File diff suppressed because it is too large Load diff

View file

@ -1,137 +0,0 @@
//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
#include "AMDILDevice.h"
#include "AMDGPUSubtarget.h"
using namespace llvm;
// Default implementation for all of the classes.
AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST)
{
mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
setCaps();
mDeviceFlag = OCL_DEVICE_ALL;
}
AMDGPUDevice::~AMDGPUDevice()
{
mHWBits.clear();
mSWBits.clear();
}
size_t AMDGPUDevice::getMaxGDSSize() const
{
return 0;
}
uint32_t
AMDGPUDevice::getDeviceFlag() const
{
return mDeviceFlag;
}
size_t AMDGPUDevice::getMaxNumCBs() const
{
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
return HW_MAX_NUM_CB;
}
return 0;
}
size_t AMDGPUDevice::getMaxCBSize() const
{
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
return MAX_CB_SIZE;
}
return 0;
}
size_t AMDGPUDevice::getMaxScratchSize() const
{
return 65536;
}
uint32_t AMDGPUDevice::getStackAlignment() const
{
return 16;
}
void AMDGPUDevice::setCaps()
{
mSWBits.set(AMDGPUDeviceInfo::HalfOps);
mSWBits.set(AMDGPUDeviceInfo::ByteOps);
mSWBits.set(AMDGPUDeviceInfo::ShortOps);
mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
mSWBits.set(AMDGPUDeviceInfo::NoInline);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
mSWBits.set(AMDGPUDeviceInfo::MacroDB);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
} else {
mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
} else {
mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
}
mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
mSWBits.set(AMDGPUDeviceInfo::LongOps);
}
AMDGPUDeviceInfo::ExecutionMode
AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const
{
if (mHWBits[Caps]) {
assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
return AMDGPUDeviceInfo::Hardware;
}
if (mSWBits[Caps]) {
assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
return AMDGPUDeviceInfo::Software;
}
return AMDGPUDeviceInfo::Unsupported;
}
bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const
{
return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
}
bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const
{
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
}
bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const
{
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
}
std::string
AMDGPUDevice::getDataLayout() const
{
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
"-n8:16:32:64");
}

View file

@ -1,115 +0,0 @@
//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Interface for the subtarget data classes.
//
//===----------------------------------------------------------------------===//
// This file will define the interface that each generation needs to
// implement in order to correctly answer queries on the capabilities of the
// specific hardware.
//===----------------------------------------------------------------------===//
#ifndef _AMDILDEVICEIMPL_H_
#define _AMDILDEVICEIMPL_H_
#include "AMDIL.h"
#include "llvm/ADT/BitVector.h"
namespace llvm {
class AMDGPUSubtarget;
class MCStreamer;
//===----------------------------------------------------------------------===//
// Interface for data that is specific to a single device
//===----------------------------------------------------------------------===//
class AMDGPUDevice {
public:
AMDGPUDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPUDevice();
// Enum values for the various memory types.
enum {
RAW_UAV_ID = 0,
ARENA_UAV_ID = 1,
LDS_ID = 2,
GDS_ID = 3,
SCRATCH_ID = 4,
CONSTANT_ID = 5,
GLOBAL_ID = 6,
MAX_IDS = 7
} IO_TYPE_IDS;
// Returns the max LDS size that the hardware supports. Size is in
// bytes.
virtual size_t getMaxLDSSize() const = 0;
// Returns the max GDS size that the hardware supports if the GDS is
// supported by the hardware. Size is in bytes.
virtual size_t getMaxGDSSize() const;
// Returns the max number of hardware constant address spaces that
// are supported by this device.
virtual size_t getMaxNumCBs() const;
// Returns the max number of bytes a single hardware constant buffer
// can support. Size is in bytes.
virtual size_t getMaxCBSize() const;
// Returns the max number of bytes allowed by the hardware scratch
// buffer. Size is in bytes.
virtual size_t getMaxScratchSize() const;
// Get the flag that corresponds to the device.
virtual uint32_t getDeviceFlag() const;
// Returns the number of work-items that exist in a single hardware
// wavefront.
virtual size_t getWavefrontSize() const = 0;
// Get the generational name of this specific device.
virtual uint32_t getGeneration() const = 0;
// Get the stack alignment of this specific device.
virtual uint32_t getStackAlignment() const;
// Get the resource ID for this specific device.
virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
// Get the max number of UAV's for this device.
virtual uint32_t getMaxNumUAVs() const = 0;
// API utilizing more detailed capabilities of each family of
// cards. If a capability is supported, then either usesHardware or
// usesSoftware returned true. If usesHardware returned true, then
// usesSoftware must return false for the same capability. Hardware
// execution means that the feature is done natively by the hardware
// and is not emulated by the softare. Software execution means
// that the feature could be done in the hardware, but there is
// software that emulates it with possibly using the hardware for
// support since the hardware does not fully comply with OpenCL
// specs.
bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
virtual std::string getDataLayout() const;
static const unsigned int MAX_LDS_SIZE_700 = 16384;
static const unsigned int MAX_LDS_SIZE_800 = 32768;
static const unsigned int WavefrontSize = 64;
static const unsigned int HalfWavefrontSize = 32;
static const unsigned int QuarterWavefrontSize = 16;
protected:
virtual void setCaps();
llvm::BitVector mHWBits;
llvm::BitVector mSWBits;
AMDGPUSubtarget *mSTM;
uint32_t mDeviceFlag;
private:
AMDGPUDeviceInfo::ExecutionMode
getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
}; // AMDILDevice
} // namespace llvm
#endif // _AMDILDEVICEIMPL_H_

View file

@ -1,94 +0,0 @@
//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Function that creates DeviceInfo from a device name and other information.
//
//==-----------------------------------------------------------------------===//
#include "AMDILDevices.h"
#include "AMDGPUSubtarget.h"
using namespace llvm;
namespace llvm {
namespace AMDGPUDeviceInfo {
AMDGPUDevice*
getDeviceFromName(const std::string &deviceName, AMDGPUSubtarget *ptr,
bool is64bit, bool is64on32bit)
{
if (deviceName.c_str()[2] == '7') {
switch (deviceName.c_str()[3]) {
case '1':
return new AMDGPU710Device(ptr);
case '7':
return new AMDGPU770Device(ptr);
default:
return new AMDGPU7XXDevice(ptr);
};
} else if (deviceName == "cypress") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUCypressDevice(ptr);
} else if (deviceName == "juniper") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUEvergreenDevice(ptr);
} else if (deviceName == "redwood") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPURedwoodDevice(ptr);
} else if (deviceName == "cedar") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUCedarDevice(ptr);
} else if (deviceName == "barts"
|| deviceName == "turks") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUNIDevice(ptr);
} else if (deviceName == "cayman") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUCaymanDevice(ptr);
} else if (deviceName == "caicos") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUNIDevice(ptr);
} else if (deviceName == "SI") {
return new AMDGPUSIDevice(ptr);
} else {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPU7XXDevice(ptr);
}
}
} // End namespace AMDGPUDeviceInfo
} // End namespace llvm

View file

@ -1,90 +0,0 @@
//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
#ifndef _AMDILDEVICEINFO_H_
#define _AMDILDEVICEINFO_H_
#include <string>
namespace llvm
{
class AMDGPUDevice;
class AMDGPUSubtarget;
namespace AMDGPUDeviceInfo
{
// Each Capabilities can be executed using a hardware instruction,
// emulated with a sequence of software instructions, or not
// supported at all.
enum ExecutionMode {
Unsupported = 0, // Unsupported feature on the card(Default value)
Software, // This is the execution mode that is set if the
// feature is emulated in software
Hardware // This execution mode is set if the feature exists
// natively in hardware
};
// Any changes to this needs to have a corresponding update to the
// twiki page GPUMetadataABI
enum Caps {
HalfOps = 0x1, // Half float is supported or not.
DoubleOps = 0x2, // Double is supported or not.
ByteOps = 0x3, // Byte(char) is support or not.
ShortOps = 0x4, // Short is supported or not.
LongOps = 0x5, // Long is supported or not.
Images = 0x6, // Images are supported or not.
ByteStores = 0x7, // ByteStores available(!HD4XXX).
ConstantMem = 0x8, // Constant/CB memory.
LocalMem = 0x9, // Local/LDS memory.
PrivateMem = 0xA, // Scratch/Private/Stack memory.
RegionMem = 0xB, // OCL GDS Memory Extension.
FMA = 0xC, // Use HW FMA or SW FMA.
ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
Reserved0 = 0xF, // ReservedFlag
NoAlias = 0x10, // Cached loads.
Signed24BitOps = 0x11, // Peephole Optimization.
// Debug mode implies that no hardware features or optimizations
// are performned and that all memory access go through a single
// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
Debug = 0x12, // Debug mode is enabled.
CachedMem = 0x13, // Cached mem is available or not.
BarrierDetect = 0x14, // Detect duplicate barriers.
Reserved1 = 0x15, // Reserved flag
ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
TmrReg = 0x18, // Flag to specify if Tmr register is supported.
NoInline = 0x19, // Flag to specify that no inlining should occur.
MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
// If more capabilities are required, then
// this number needs to be increased.
// All capabilities must come before this
// number.
MaxNumberCapabilities = 0x20
};
// These have to be in order with the older generations
// having the lower number enumerations.
enum Generation {
HD4XXX = 0, // 7XX based devices.
HD5XXX, // Evergreen based devices.
HD6XXX, // NI/Evergreen+ based devices.
HD7XXX,
HDTEST, // Experimental feature testing device.
HDNUMGEN
};
AMDGPUDevice*
getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
bool is64bit = false, bool is64on32bit = false);
} // namespace AMDILDeviceInfo
} // namespace llvm
#endif // _AMDILDEVICEINFO_H_

View file

@ -1,19 +0,0 @@
//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
#ifndef __AMDIL_DEVICES_H_
#define __AMDIL_DEVICES_H_
// Include all of the device specific header files
// This file is for Internal use only!
#include "AMDIL7XXDevice.h"
#include "AMDILDevice.h"
#include "AMDILEvergreenDevice.h"
#include "AMDILNIDevice.h"
#include "AMDILSIDevice.h"
#endif // _AMDIL_DEVICES_H_

View file

@ -1,169 +0,0 @@
//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
#include "AMDILEvergreenDevice.h"
using namespace llvm;
AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
: AMDGPUDevice(ST) {
setCaps();
std::string name = ST->getDeviceName();
if (name == "cedar") {
mDeviceFlag = OCL_DEVICE_CEDAR;
} else if (name == "redwood") {
mDeviceFlag = OCL_DEVICE_REDWOOD;
} else if (name == "cypress") {
mDeviceFlag = OCL_DEVICE_CYPRESS;
} else {
mDeviceFlag = OCL_DEVICE_JUNIPER;
}
}
AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
}
size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return MAX_LDS_SIZE_800;
} else {
return 0;
}
}
size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
return MAX_LDS_SIZE_800;
} else {
return 0;
}
}
uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
return 12;
}
uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
switch(id) {
default:
assert(0 && "ID type passed in is unknown!");
break;
case CONSTANT_ID:
case RAW_UAV_ID:
return GLOBAL_RETURN_RAW_UAV_ID;
case GLOBAL_ID:
case ARENA_UAV_ID:
return DEFAULT_ARENA_UAV_ID;
case LDS_ID:
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return DEFAULT_LDS_ID;
} else {
return DEFAULT_ARENA_UAV_ID;
}
case GDS_ID:
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
return DEFAULT_GDS_ID;
} else {
return DEFAULT_ARENA_UAV_ID;
}
case SCRATCH_ID:
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
return DEFAULT_SCRATCH_ID;
} else {
return DEFAULT_ARENA_UAV_ID;
}
};
return 0;
}
size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
return AMDGPUDevice::WavefrontSize;
}
uint32_t AMDGPUEvergreenDevice::getGeneration() const {
return AMDGPUDeviceInfo::HD5XXX;
}
void AMDGPUEvergreenDevice::setCaps() {
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
mHWBits.set(AMDGPUDeviceInfo::ByteStores);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
mSWBits.set(AMDGPUDeviceInfo::RegionMem);
} else {
mHWBits.set(AMDGPUDeviceInfo::LocalMem);
mHWBits.set(AMDGPUDeviceInfo::RegionMem);
}
mHWBits.set(AMDGPUDeviceInfo::Images);
if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
mHWBits.set(AMDGPUDeviceInfo::NoAlias);
}
mHWBits.set(AMDGPUDeviceInfo::CachedMem);
if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
}
mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
mHWBits.set(AMDGPUDeviceInfo::LongOps);
mSWBits.reset(AMDGPUDeviceInfo::LongOps);
mHWBits.set(AMDGPUDeviceInfo::TmrReg);
}
AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST) {
setCaps();
}
AMDGPUCypressDevice::~AMDGPUCypressDevice() {
}
void AMDGPUCypressDevice::setCaps() {
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
mHWBits.set(AMDGPUDeviceInfo::FMA);
}
}
AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST) {
setCaps();
}
AMDGPUCedarDevice::~AMDGPUCedarDevice() {
}
void AMDGPUCedarDevice::setCaps() {
mSWBits.set(AMDGPUDeviceInfo::FMA);
}
size_t AMDGPUCedarDevice::getWavefrontSize() const {
return AMDGPUDevice::QuarterWavefrontSize;
}
AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST) {
setCaps();
}
AMDGPURedwoodDevice::~AMDGPURedwoodDevice()
{
}
void AMDGPURedwoodDevice::setCaps() {
mSWBits.set(AMDGPUDeviceInfo::FMA);
}
size_t AMDGPURedwoodDevice::getWavefrontSize() const {
return AMDGPUDevice::HalfWavefrontSize;
}

View file

@ -1,87 +0,0 @@
//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Interface for the subtarget data classes.
//
//===----------------------------------------------------------------------===//
// This file will define the interface that each generation needs to
// implement in order to correctly answer queries on the capabilities of the
// specific hardware.
//===----------------------------------------------------------------------===//
#ifndef _AMDILEVERGREENDEVICE_H_
#define _AMDILEVERGREENDEVICE_H_
#include "AMDILDevice.h"
#include "AMDGPUSubtarget.h"
namespace llvm {
class AMDGPUSubtarget;
//===----------------------------------------------------------------------===//
// Evergreen generation of devices and their respective sub classes
//===----------------------------------------------------------------------===//
// The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
// series of cards. This class contains information required to differentiate
// the Evergreen device from the generic AMDGPUDevice. This device represents
// that capabilities of the 'Juniper' cards, also known as the HD57XX.
class AMDGPUEvergreenDevice : public AMDGPUDevice {
public:
AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPUEvergreenDevice();
virtual size_t getMaxLDSSize() const;
virtual size_t getMaxGDSSize() const;
virtual size_t getWavefrontSize() const;
virtual uint32_t getGeneration() const;
virtual uint32_t getMaxNumUAVs() const;
virtual uint32_t getResourceID(uint32_t) const;
protected:
virtual void setCaps();
}; // AMDGPUEvergreenDevice
// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
// support for double precision operations. This device is used to represent
// both the Cypress and Hemlock cards, which are commercially known as HD58XX
// and HD59XX cards.
class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
public:
AMDGPUCypressDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPUCypressDevice();
private:
virtual void setCaps();
}; // AMDGPUCypressDevice
// The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
// devices. This class differs from the base AMDGPUEvergreenDevice in that the
// device is a ~quarter of the 'Juniper'. These are commercially known as the
// HD54XX and HD53XX series of cards.
class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
public:
AMDGPUCedarDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPUCedarDevice();
virtual size_t getWavefrontSize() const;
private:
virtual void setCaps();
}; // AMDGPUCedarDevice
// The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
// devices. This class differs from the base class, in that these devices are
// considered about half of a 'Juniper' device. These are commercially known as
// the HD55XX and HD56XX series of cards.
class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
public:
AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPURedwoodDevice();
virtual size_t getWavefrontSize() const;
private:
virtual void setCaps();
}; // AMDGPURedwoodDevice
} // namespace llvm
#endif // _AMDGPUEVERGREENDEVICE_H_

View file

@ -1,53 +0,0 @@
//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Interface to describe a layout of a stack frame on a AMDIL target machine
//
//===----------------------------------------------------------------------===//
#include "AMDILFrameLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
using namespace llvm;
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
int LAO, unsigned TransAl)
: TargetFrameLowering(D, StackAl, LAO, TransAl)
{
}
AMDGPUFrameLowering::~AMDGPUFrameLowering()
{
}
/// getFrameIndexOffset - Returns the displacement from the frame register to
/// the stack frame of the specified index.
int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->getObjectOffset(FI);
}
const TargetFrameLowering::SpillSlot *
AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
{
NumEntries = 0;
return 0;
}
void
AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const
{
}
void
AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
{
}
bool
AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const
{
return false;
}

View file

@ -1,46 +0,0 @@
//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Interface to describe a layout of a stack frame on a AMDIL target machine
//
//===----------------------------------------------------------------------===//
#ifndef _AMDILFRAME_LOWERING_H_
#define _AMDILFRAME_LOWERING_H_
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetFrameLowering.h"
/// Information about the stack frame layout on the AMDGPU targets. It holds
/// the direction of the stack growth, the known stack alignment on entry to
/// each function, and the offset to the locals area.
/// See TargetFrameInfo for more comments.
namespace llvm {
class AMDGPUFrameLowering : public TargetFrameLowering {
public:
AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
TransAl = 1);
virtual ~AMDGPUFrameLowering();
virtual int getFrameIndexOffset(const MachineFunction &MF,
int FI) const;
virtual const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const;
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
virtual bool hasFP(const MachineFunction &MF) const;
}; // class AMDGPUFrameLowering
} // namespace llvm
#endif // _AMDILFRAME_LOWERING_H_

View file

@ -1,395 +0,0 @@
//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file defines an instruction selector for the AMDIL target.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUInstrInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPURegisterInfo.h"
#include "AMDILDevices.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
#include <list>
#include <queue>
using namespace llvm;
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AMDGPUDAGToDAGISel - AMDGPU specific code to select AMDGPU machine instructions
// //for SelectionDAG operations.
//
namespace {
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const AMDGPUSubtarget &Subtarget;
public:
AMDGPUDAGToDAGISel(TargetMachine &TM);
virtual ~AMDGPUDAGToDAGISel();
SDNode *Select(SDNode *N);
virtual const char *getPassName() const;
private:
inline SDValue getSmallIPtrImm(unsigned Imm);
// Complex pattern selectors
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
static bool checkType(const Value *ptr, unsigned int addrspace);
static const Value *getBasePointerValue(const Value *V);
static bool isGlobalStore(const StoreSDNode *N);
static bool isPrivateStore(const StoreSDNode *N);
static bool isLocalStore(const StoreSDNode *N);
static bool isRegionStore(const StoreSDNode *N);
static bool isCPLoad(const LoadSDNode *N);
static bool isConstantLoad(const LoadSDNode *N, int cbID);
static bool isGlobalLoad(const LoadSDNode *N);
static bool isPrivateLoad(const LoadSDNode *N);
static bool isLocalLoad(const LoadSDNode *N);
static bool isRegionLoad(const LoadSDNode *N);
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
} // end anonymous namespace
// createAMDGPUISelDag - This pass converts a legalized DAG into a AMDGPU-specific
// DAG, ready for instruction scheduling.
//
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
) {
return new AMDGPUDAGToDAGISel(TM);
}
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
)
: SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>())
{
}
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
}
SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
bool AMDGPUDAGToDAGISel::SelectADDRParam(
SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
R2 = CurDAG->getTargetConstant(0, MVT::i32);
} else {
R1 = Addr;
R2 = CurDAG->getTargetConstant(0, MVT::i32);
}
} else if (Addr.getOpcode() == ISD::ADD) {
R1 = Addr.getOperand(0);
R2 = Addr.getOperand(1);
} else {
R1 = Addr;
R2 = CurDAG->getTargetConstant(0, MVT::i32);
}
return true;
}
bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
return false;
}
return SelectADDRParam(Addr, R1, R2);
}
bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
return false;
}
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
R2 = CurDAG->getTargetConstant(0, MVT::i64);
} else {
R1 = Addr;
R2 = CurDAG->getTargetConstant(0, MVT::i64);
}
} else if (Addr.getOpcode() == ISD::ADD) {
R1 = Addr.getOperand(0);
R2 = Addr.getOperand(1);
} else {
R1 = Addr;
R2 = CurDAG->getTargetConstant(0, MVT::i64);
}
return true;
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
return NULL; // Already selected.
}
switch (Opc) {
default: break;
case ISD::FrameIndex:
{
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
unsigned int FI = FIN->getIndex();
EVT OpVT = N->getValueType(0);
unsigned int NewOpc = AMDGPU::COPY;
SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
}
}
break;
}
return SelectCode(N);
}
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
if (!ptr) {
return false;
}
Type *ptrType = ptr->getType();
return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
}
const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V)
{
if (!V) {
return NULL;
}
const Value *ret = NULL;
ValueMap<const Value *, bool> ValueBitMap;
std::queue<const Value *, std::list<const Value *> > ValueQueue;
ValueQueue.push(V);
while (!ValueQueue.empty()) {
V = ValueQueue.front();
if (ValueBitMap.find(V) == ValueBitMap.end()) {
ValueBitMap[V] = true;
if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
ret = V;
break;
} else if (dyn_cast<GlobalVariable>(V)) {
ret = V;
break;
} else if (dyn_cast<Constant>(V)) {
const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
if (CE) {
ValueQueue.push(CE->getOperand(0));
}
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
ret = AI;
break;
} else if (const Instruction *I = dyn_cast<Instruction>(V)) {
uint32_t numOps = I->getNumOperands();
for (uint32_t x = 0; x < numOps; ++x) {
ValueQueue.push(I->getOperand(x));
}
} else {
// assert(0 && "Found a Value that we didn't know how to handle!");
}
}
ValueQueue.pop();
}
return ret;
}
bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
}
bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
return true;
}
MachineMemOperand *MMO = N->getMemOperand();
const Value *V = MMO->getValue();
const Value *BV = getBasePointerValue(V);
if (MMO
&& MMO->getValue()
&& ((V && dyn_cast<GlobalValue>(V))
|| (BV && dyn_cast<GlobalValue>(
getBasePointerValue(MMO->getValue()))))) {
return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
} else {
return false;
}
}
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
MachineMemOperand *MMO = N->getMemOperand();
if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
if (MMO) {
const Value *V = MMO->getValue();
const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
return true;
}
}
}
return false;
}
bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
// Check to make sure we are not a constant pool load or a constant load
// that is marked as a private load
if (isCPLoad(N) || isConstantLoad(N, -1)) {
return false;
}
}
if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS))
{
return true;
}
return false;
}
const char *AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
#ifdef DEBUGTMP
#undef INT64_C
#endif
#undef DEBUGTMP
///==== AMDGPU Functions ====///
bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
SDValue& Offset) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
return false;
}
if (Addr.getOpcode() == ISD::ADD) {
bool Match = false;
// Find the base ptr and the offset
for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
SDValue Arg = Addr.getOperand(i);
ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
// This arg isn't a constant so it must be the base PTR.
if (!OffsetNode) {
Base = Addr.getOperand(i);
continue;
}
// Check if the constant argument fits in 8-bits. The offset is in bytes
// so we need to convert it to dwords.
if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) {
Match = true;
Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
MVT::i32);
}
}
return Match;
}
// Default case, no offset
Base = Addr;
Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset)
{
ConstantSDNode * IMMOffset;
if (Addr.getOpcode() == ISD::ADD
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
return true;
// If the pointer address is constant, we can move it to the offset field.
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
CurDAG->getEntryNode().getDebugLoc(),
AMDGPU::ZERO, MVT::i32);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
return true;
}
// Default case, no offset
Base = Addr;
Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
SDValue& Offset) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress ||
Addr.getOpcode() != ISD::ADD) {
return false;
}
Base = Addr.getOperand(0);
Offset = Addr.getOperand(1);
return true;
}

View file

@ -1,677 +0,0 @@
//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file contains TargetLowering functions borrowed from AMDLI.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUISelLowering.h"
#include "AMDGPURegisterInfo.h"
#include "AMDILDevices.h"
#include "AMDILIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "AMDGPUGenCallingConv.inc"
//===----------------------------------------------------------------------===//
// TargetLowering Implementation Help Functions End
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// TargetLowering Class Implementation Begins
//===----------------------------------------------------------------------===//
void AMDGPUTargetLowering::InitAMDILLowering()
{
int types[] =
{
(int)MVT::i8,
(int)MVT::i16,
(int)MVT::i32,
(int)MVT::f32,
(int)MVT::f64,
(int)MVT::i64,
(int)MVT::v2i8,
(int)MVT::v4i8,
(int)MVT::v2i16,
(int)MVT::v4i16,
(int)MVT::v4f32,
(int)MVT::v4i32,
(int)MVT::v2f32,
(int)MVT::v2i32,
(int)MVT::v2f64,
(int)MVT::v2i64
};
int IntTypes[] =
{
(int)MVT::i8,
(int)MVT::i16,
(int)MVT::i32,
(int)MVT::i64
};
int FloatTypes[] =
{
(int)MVT::f32,
(int)MVT::f64
};
int VectorTypes[] =
{
(int)MVT::v2i8,
(int)MVT::v4i8,
(int)MVT::v2i16,
(int)MVT::v4i16,
(int)MVT::v4f32,
(int)MVT::v4i32,
(int)MVT::v2f32,
(int)MVT::v2i32,
(int)MVT::v2f64,
(int)MVT::v2i64
};
size_t numTypes = sizeof(types) / sizeof(*types);
size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
// These are the current register classes that are
// supported
for (unsigned int x = 0; x < numTypes; ++x) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
// We cannot sextinreg, expand to shifts
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
setOperationAction(ISD::SUBE, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::ADDE, VT, Expand);
setOperationAction(ISD::ADDC, VT, Expand);
setOperationAction(ISD::BRCOND, VT, Custom);
setOperationAction(ISD::BR_JT, VT, Expand);
setOperationAction(ISD::BRIND, VT, Expand);
// TODO: Implement custom UREM/SREM routines
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
if (VT != MVT::i64 && VT != MVT::v2i64) {
setOperationAction(ISD::SDIV, VT, Custom);
}
}
for (unsigned int x = 0; x < numFloatTypes; ++x) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
// IL does not have these operations for floating point types
setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
setOperationAction(ISD::SETOLT, VT, Expand);
setOperationAction(ISD::SETOGE, VT, Expand);
setOperationAction(ISD::SETOGT, VT, Expand);
setOperationAction(ISD::SETOLE, VT, Expand);
setOperationAction(ISD::SETULT, VT, Expand);
setOperationAction(ISD::SETUGE, VT, Expand);
setOperationAction(ISD::SETUGT, VT, Expand);
setOperationAction(ISD::SETULE, VT, Expand);
}
for (unsigned int x = 0; x < numIntTypes; ++x) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
// GPU also does not have divrem function for signed or unsigned
setOperationAction(ISD::SDIVREM, VT, Expand);
// GPU does not have [S|U]MUL_LOHI functions as a single instruction
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
// GPU doesn't have a rotl, rotr, or byteswap instruction
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
// GPU doesn't have any counting operators
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
}
for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
{
MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
// setOperationAction(ISD::VSETCC, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
}
if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
setOperationAction(ISD::MULHU, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
setOperationAction(ISD::MULHS, MVT::i64, Expand);
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
setOperationAction(ISD::Constant , MVT::i64 , Legal);
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
}
if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
// we support loading/storing v2f64 but not operations on the type
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
// We want to expand vector conversions into their scalar
// counterparts.
setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
}
// TODO: Fix the UDIV24 algorithm so it works for these
// types correctly. This needs vector comparisons
// for this to work correctly.
setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
setOperationAction(ISD::SUBC, MVT::Other, Expand);
setOperationAction(ISD::ADDE, MVT::Other, Expand);
setOperationAction(ISD::ADDC, MVT::Other, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
// Use the default implementation.
setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
setOperationAction(ISD::Constant , MVT::i32 , Legal);
setSchedulingPreference(Sched::RegPressure);
setPow2DivIsCheap(false);
setPrefLoopAlignment(16);
setSelectIsExpensive(true);
setJumpIsExpensive(true);
maxStoresPerMemcpy = 4096;
maxStoresPerMemmove = 4096;
maxStoresPerMemset = 4096;
#undef numTypes
#undef numIntTypes
#undef numVectorTypes
#undef numFloatTypes
}
bool
AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I, unsigned Intrinsic) const
{
return false;
}
// The backend supports 32 and 64 bit floating point immediates
bool
AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
{
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
return true;
} else {
return false;
}
}
bool
AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
{
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
return false;
} else {
return true;
}
}
// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
// be zero. Op is expected to be a target specific node. Used by DAG
// combiner.
void
AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const
{
APInt KnownZero2;
APInt KnownOne2;
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
switch (Op.getOpcode()) {
default: break;
case ISD::SELECT_CC:
DAG.ComputeMaskedBits(
Op.getOperand(1),
KnownZero,
KnownOne,
Depth + 1
);
DAG.ComputeMaskedBits(
Op.getOperand(0),
KnownZero2,
KnownOne2
);
assert((KnownZero & KnownOne) == 0
&& "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0
&& "Bits known to be one AND zero?");
// Only known if known in both the LHS and RHS
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
};
}
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
SDValue
AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
{
EVT OVT = Op.getValueType();
SDValue DST;
if (OVT.getScalarType() == MVT::i64) {
DST = LowerSDIV64(Op, DAG);
} else if (OVT.getScalarType() == MVT::i32) {
DST = LowerSDIV32(Op, DAG);
} else if (OVT.getScalarType() == MVT::i16
|| OVT.getScalarType() == MVT::i8) {
DST = LowerSDIV24(Op, DAG);
} else {
DST = SDValue(Op.getNode(), 0);
}
return DST;
}
SDValue
AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
{
EVT OVT = Op.getValueType();
SDValue DST;
if (OVT.getScalarType() == MVT::i64) {
DST = LowerSREM64(Op, DAG);
} else if (OVT.getScalarType() == MVT::i32) {
DST = LowerSREM32(Op, DAG);
} else if (OVT.getScalarType() == MVT::i16) {
DST = LowerSREM16(Op, DAG);
} else if (OVT.getScalarType() == MVT::i8) {
DST = LowerSREM8(Op, DAG);
} else {
DST = SDValue(Op.getNode(), 0);
}
return DST;
}
SDValue
AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
{
SDValue Data = Op.getOperand(0);
VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
DebugLoc DL = Op.getDebugLoc();
EVT DVT = Data.getValueType();
EVT BVT = BaseType->getVT();
unsigned baseBits = BVT.getScalarType().getSizeInBits();
unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
unsigned shiftBits = srcBits - baseBits;
if (srcBits < 32) {
// If the op is less than 32 bits, then it needs to extend to 32bits
// so it can properly keep the upper bits valid.
EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
shiftBits = 32 - baseBits;
DVT = IVT;
}
SDValue Shift = DAG.getConstant(shiftBits, DVT);
// Shift left by 'Shift' bits.
Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
// Signed shift Right by 'Shift' bits.
Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
if (srcBits < 32) {
// Once the sign extension is done, the op needs to be converted to
// its original type.
Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
}
return Data;
}
EVT
AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
{
int iSize = (size * numEle);
int vEle = (iSize >> ((size == 64) ? 6 : 5));
if (!vEle) {
vEle = 1;
}
if (size == 64) {
if (vEle == 1) {
return EVT(MVT::i64);
} else {
return EVT(MVT::getVectorVT(MVT::i64, vEle));
}
} else {
if (vEle == 1) {
return EVT(MVT::i32);
} else {
return EVT(MVT::getVectorVT(MVT::i32, vEle));
}
}
}
SDValue
AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
{
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
SDValue Jump = Op.getOperand(2);
SDValue Result;
Result = DAG.getNode(
AMDGPUISD::BRANCH_COND,
Op.getDebugLoc(),
Op.getValueType(),
Chain, Jump, Cond);
return Result;
}
SDValue
AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
MVT INTTY;
MVT FLTTY;
if (!OVT.isVector()) {
INTTY = MVT::i32;
FLTTY = MVT::f32;
} else if (OVT.getVectorNumElements() == 2) {
INTTY = MVT::v2i32;
FLTTY = MVT::v2f32;
} else if (OVT.getVectorNumElements() == 4) {
INTTY = MVT::v4i32;
FLTTY = MVT::v4f32;
}
unsigned bitsize = OVT.getScalarType().getSizeInBits();
// char|short jq = ia ^ ib;
SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
// jq = jq >> (bitsize - 2)
jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
// jq = jq | 0x1
jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
// jq = (int)jq
jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
// int ia = (int)LHS;
SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
// int ib, (int)RHS;
SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
// float fa = (float)ia;
SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
// float fb = (float)ib;
SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
// float fq = native_divide(fa, fb);
SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
// fq = trunc(fq);
fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
// float fqneg = -fq;
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
// float fr = mad(fqneg, fb, fa);
SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
// int iq = (int)fq;
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
// fr = fabs(fr);
fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
// fb = fabs(fb);
fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
// int cv = fr >= fb;
SDValue cv;
if (INTTY == MVT::i32) {
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
} else {
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
}
// jq = (cv ? jq : 0);
jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
DAG.getConstant(0, OVT));
// dst = iq + jq;
iq = DAG.getSExtOrTrunc(iq, DL, OVT);
iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
return iq;
}
SDValue
AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
// The LowerSDIV32 function generates equivalent to the following IL.
// mov r0, LHS
// mov r1, RHS
// ilt r10, r0, 0
// ilt r11, r1, 0
// iadd r0, r0, r10
// iadd r1, r1, r11
// ixor r0, r0, r10
// ixor r1, r1, r11
// udiv r0, r0, r1
// ixor r10, r10, r11
// iadd r0, r0, r10
// ixor DST, r0, r10
// mov r0, LHS
SDValue r0 = LHS;
// mov r1, RHS
SDValue r1 = RHS;
// ilt r10, r0, 0
SDValue r10 = DAG.getSelectCC(DL,
r0, DAG.getConstant(0, OVT),
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
ISD::SETLT);
// ilt r11, r1, 0
SDValue r11 = DAG.getSelectCC(DL,
r1, DAG.getConstant(0, OVT),
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
ISD::SETLT);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
// iadd r1, r1, r11
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
// ixor r0, r0, r10
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
// ixor r1, r1, r11
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
// udiv r0, r0, r1
r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
// ixor r10, r10, r11
r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
// ixor DST, r0, r10
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
return DST;
}
SDValue
AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
{
return SDValue(Op.getNode(), 0);
}
SDValue
AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
MVT INTTY = MVT::i32;
if (OVT == MVT::v2i8) {
INTTY = MVT::v2i32;
} else if (OVT == MVT::v4i8) {
INTTY = MVT::v4i32;
}
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
return LHS;
}
SDValue
AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
MVT INTTY = MVT::i32;
if (OVT == MVT::v2i16) {
INTTY = MVT::v2i32;
} else if (OVT == MVT::v4i16) {
INTTY = MVT::v4i32;
}
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
return LHS;
}
SDValue
AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
// The LowerSREM32 function generates equivalent to the following IL.
// mov r0, LHS
// mov r1, RHS
// ilt r10, r0, 0
// ilt r11, r1, 0
// iadd r0, r0, r10
// iadd r1, r1, r11
// ixor r0, r0, r10
// ixor r1, r1, r11
// udiv r20, r0, r1
// umul r20, r20, r1
// sub r0, r0, r20
// iadd r0, r0, r10
// ixor DST, r0, r10
// mov r0, LHS
SDValue r0 = LHS;
// mov r1, RHS
SDValue r1 = RHS;
// ilt r10, r0, 0
SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
// ilt r11, r1, 0
SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
// iadd r1, r1, r11
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
// ixor r0, r0, r10
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
// ixor r1, r1, r11
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
// udiv r20, r0, r1
SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
// umul r20, r20, r1
r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
// sub r0, r0, r20
r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
// ixor DST, r0, r10
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
return DST;
}
SDValue
AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
{
return SDValue(Op.getNode(), 0);
}

View file

@ -1,270 +0,0 @@
//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file describes the AMDIL instructions in TableGen format.
//
//===----------------------------------------------------------------------===//
// AMDIL Instruction Predicate Definitions
// Predicate that is set to true if the hardware supports double precision
// divide
def HasHWDDiv : Predicate<"Subtarget.device()"
"->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
// Predicate that is set to true if the hardware supports double, but not double
// precision divide in hardware
def HasSWDDiv : Predicate<"Subtarget.device()"
"->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
// Predicate that is set to true if the hardware support 24bit signed
// math ops. Otherwise a software expansion to 32bit math ops is used instead.
def HasHWSign24Bit : Predicate<"Subtarget.device()"
"->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
// Predicate that is set to true if 64bit operations are supported or not
def HasHW64Bit : Predicate<"Subtarget.device()"
"->usesHardware(AMDGPUDeviceInfo::LongOps)">;
def HasSW64Bit : Predicate<"Subtarget.device()"
"->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
// Predicate that is set to true if the timer register is supported
def HasTmrRegister : Predicate<"Subtarget.device()"
"->isSupported(AMDGPUDeviceInfo::TmrReg)">;
// Predicate that is true if we are at least evergreen series
def HasDeviceIDInst : Predicate<"Subtarget.device()"
"->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
// Predicate that is true if we have region address space.
def hasRegionAS : Predicate<"Subtarget.device()"
"->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
// Predicate that is false if we don't have region address space.
def noRegionAS : Predicate<"!Subtarget.device()"
"->isSupported(AMDGPUDeviceInfo::RegionMem)">;
// Predicate that is set to true if 64bit Mul is supported in the IL or not
def HasHW64Mul : Predicate<"Subtarget.calVersion()"
">= CAL_VERSION_SC_139"
"&& Subtarget.device()"
"->getGeneration() >="
"AMDGPUDeviceInfo::HD5XXX">;
def HasSW64Mul : Predicate<"Subtarget.calVersion()"
"< CAL_VERSION_SC_139">;
// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
def HasHW64DivMod : Predicate<"Subtarget.device()"
"->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
def HasSW64DivMod : Predicate<"Subtarget.device()"
"->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
// Predicate that is set to true if 64bit pointer are used.
def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
//===--------------------------------------------------------------------===//
// Custom Operands
//===--------------------------------------------------------------------===//
def brtarget : Operand<OtherVT>;
//===--------------------------------------------------------------------===//
// Custom Selection DAG Type Profiles
//===--------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Generic Profile Types
//===----------------------------------------------------------------------===//
def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
]>;
def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
]>;
def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
SDTCisEltOfVec<1, 0>
]>;
//===----------------------------------------------------------------------===//
// Flow Control Profile Types
//===----------------------------------------------------------------------===//
// Branch instruction where second and third are basic blocks
def SDTIL_BRCond : SDTypeProfile<0, 2, [
SDTCisVT<0, OtherVT>
]>;
//===--------------------------------------------------------------------===//
// Custom Selection DAG Nodes
//===--------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Flow Control DAG Nodes
//===----------------------------------------------------------------------===//
def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// Call/Return DAG Nodes
//===----------------------------------------------------------------------===//
def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
//===--------------------------------------------------------------------===//
// Instructions
//===--------------------------------------------------------------------===//
// Floating point math functions
def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
//===----------------------------------------------------------------------===//
// Integer functions
//===----------------------------------------------------------------------===//
def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
//===--------------------------------------------------------------------===//
// Custom Pattern DAG Nodes
//===--------------------------------------------------------------------===//
def global_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
//===----------------------------------------------------------------------===//
// Load pattern fragments
//===----------------------------------------------------------------------===//
// Global address space loads
def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
// Constant address space loads
def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
//===----------------------------------------------------------------------===//
// Complex addressing mode patterns
//===----------------------------------------------------------------------===//
def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
//===----------------------------------------------------------------------===//
// Instruction format classes
//===----------------------------------------------------------------------===//
class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction {
let Namespace = "AMDGPU";
dag OutOperandList = outs;
dag InOperandList = ins;
let Pattern = pattern;
let AsmString = !strconcat(asmstr, "\n");
let isPseudo = 1;
let Itinerary = NullALU;
bit hasIEEEFlag = 0;
bit hasZeroOpFlag = 0;
}
//===--------------------------------------------------------------------===//
// Multiclass Instruction formats
//===--------------------------------------------------------------------===//
// Multiclass that handles branch instructions
multiclass BranchConditional<SDNode Op> {
def _i32 : ILFormat<(outs),
(ins brtarget:$target, GPRI32:$src0),
"; i32 Pseudo branch instruction",
[(Op bb:$target, GPRI32:$src0)]>;
def _f32 : ILFormat<(outs),
(ins brtarget:$target, GPRF32:$src0),
"; f32 Pseudo branch instruction",
[(Op bb:$target, GPRF32:$src0)]>;
}
// Only scalar types should generate flow control
multiclass BranchInstr<string name> {
def _i32 : ILFormat<(outs), (ins GPRI32:$src),
!strconcat(name, " $src"), []>;
def _f32 : ILFormat<(outs), (ins GPRF32:$src),
!strconcat(name, " $src"), []>;
}
// Only scalar types should generate flow control
multiclass BranchInstr2<string name> {
def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
!strconcat(name, " $src0, $src1"), []>;
def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
!strconcat(name, " $src0, $src1"), []>;
}
//===--------------------------------------------------------------------===//
// Intrinsics support
//===--------------------------------------------------------------------===//
include "AMDILIntrinsics.td"
//===--------------------------------------------------------------------===//
// Instructions support
//===--------------------------------------------------------------------===//
//===---------------------------------------------------------------------===//
// Custom Inserter for Branches and returns, this eventually will be a
// seperate pass
//===---------------------------------------------------------------------===//
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
defm BRANCH_COND : BranchConditional<IL_brcond>;
}
//===---------------------------------------------------------------------===//
// Flow and Program control Instructions
//===---------------------------------------------------------------------===//
let isTerminator=1 in {
def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
!strconcat("SWITCH", " $src"), []>;
def CASE : ILFormat< (outs), (ins GPRI32:$src),
!strconcat("CASE", " $src"), []>;
def BREAK : ILFormat< (outs), (ins),
"BREAK", []>;
def CONTINUE : ILFormat< (outs), (ins),
"CONTINUE", []>;
def DEFAULT : ILFormat< (outs), (ins),
"DEFAULT", []>;
def ELSE : ILFormat< (outs), (ins),
"ELSE", []>;
def ENDSWITCH : ILFormat< (outs), (ins),
"ENDSWITCH", []>;
def ENDMAIN : ILFormat< (outs), (ins),
"ENDMAIN", []>;
def END : ILFormat< (outs), (ins),
"END", []>;
def ENDFUNC : ILFormat< (outs), (ins),
"ENDFUNC", []>;
def ENDIF : ILFormat< (outs), (ins),
"ENDIF", []>;
def WHILELOOP : ILFormat< (outs), (ins),
"WHILE", []>;
def ENDLOOP : ILFormat< (outs), (ins),
"ENDLOOP", []>;
def FUNC : ILFormat< (outs), (ins),
"FUNC", []>;
def RETDYN : ILFormat< (outs), (ins),
"RET_DYN", []>;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
defm IFC : BranchInstr2<"IFC">;
defm BREAKC : BranchInstr2<"BREAKC">;
defm CONTINUEC : BranchInstr2<"CONTINUEC">;
}

View file

@ -1,93 +0,0 @@
//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file contains the AMDIL Implementation of the IntrinsicInfo class.
//
//===-----------------------------------------------------------------------===//
#include "AMDILIntrinsicInfo.h"
#include "AMDIL.h"
#include "AMDGPUSubtarget.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Intrinsics.h"
#include "llvm/Module.h"
using namespace llvm;
#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
#include "AMDGPUGenIntrinsics.inc"
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
: TargetIntrinsicInfo()
{
}
std::string
AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
unsigned int numTys) const
{
static const char* const names[] = {
#define GET_INTRINSIC_NAME_TABLE
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_NAME_TABLE
};
//assert(!isOverloaded(IntrID)
//&& "AMDGPU Intrinsics are not overloaded");
if (IntrID < Intrinsic::num_intrinsics) {
return 0;
}
assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
&& "Invalid intrinsic ID");
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
return Result;
}
unsigned int
AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
{
#define GET_FUNCTION_RECOGNIZER
#include "AMDGPUGenIntrinsics.inc"
#undef GET_FUNCTION_RECOGNIZER
AMDGPUIntrinsic::ID IntrinsicID
= (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
return IntrinsicID;
}
return 0;
}
bool
AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const
{
// Overload Table
#define GET_INTRINSIC_OVERLOAD_TABLE
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_OVERLOAD_TABLE
}
/// This defines the "getAttributes(ID id)" method.
#define GET_INTRINSIC_ATTRIBUTES
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_ATTRIBUTES
Function*
AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Type **Tys,
unsigned numTys) const
{
//Silence a warning
AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID);
(void)List;
assert(!"Not implemented");
}

View file

@ -1,47 +0,0 @@
//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Interface for the AMDIL Implementation of the Intrinsic Info class.
//
//===-----------------------------------------------------------------------===//
#ifndef _AMDIL_INTRINSICS_H_
#define _AMDIL_INTRINSICS_H_
#include "llvm/Intrinsics.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
namespace llvm {
class TargetMachine;
namespace AMDGPUIntrinsic {
enum ID {
last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
#define GET_INTRINSIC_ENUM_VALUES
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_ENUM_VALUES
, num_AMDGPU_intrinsics
};
}
class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
public:
AMDGPUIntrinsicInfo(TargetMachine *tm);
std::string getName(unsigned int IntrId, Type **Tys = 0,
unsigned int numTys = 0) const;
unsigned int lookupName(const char *Name, unsigned int Len) const;
bool isOverloaded(unsigned int IID) const;
Function *getDeclaration(Module *M, unsigned int ID,
Type **Tys = 0,
unsigned int numTys = 0) const;
}; // AMDGPUIntrinsicInfo
}
#endif // _AMDIL_INTRINSICS_H_

View file

@ -1,242 +0,0 @@
//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file defines all of the amdil-specific intrinsics
//
//===---------------------------------------------------------------===//
//===--------------------------------------------------------------------===//
// Intrinsic classes
// Generic versions of the above classes but for Target specific intrinsics
// instead of SDNode patterns.
//===--------------------------------------------------------------------===//
let TargetPrefix = "AMDIL", isTarget = 1 in {
class VoidIntLong :
Intrinsic<[llvm_i64_ty], [], []>;
class VoidIntInt :
Intrinsic<[llvm_i32_ty], [], []>;
class VoidIntBool :
Intrinsic<[llvm_i32_ty], [], []>;
class UnaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class UnaryIntFloat :
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class ConvertIntFTOI :
Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
class ConvertIntITOF :
Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
class UnaryIntNoRetInt :
Intrinsic<[], [llvm_anyint_ty], []>;
class UnaryIntNoRetFloat :
Intrinsic<[], [llvm_anyfloat_ty], []>;
class BinaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class BinaryIntFloat :
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class BinaryIntNoRetInt :
Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
class BinaryIntNoRetFloat :
Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
class TernaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class TernaryIntFloat :
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class QuaternaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class UnaryAtomicInt :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
class BinaryAtomicInt :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
class TernaryAtomicInt :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
class UnaryAtomicIntNoRet :
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
class BinaryAtomicIntNoRet :
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
class TernaryAtomicIntNoRet :
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
}
let TargetPrefix = "AMDIL", isTarget = 1 in {
def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
TernaryIntInt;
def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
TernaryIntInt;
def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
UnaryIntInt;
def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
UnaryIntInt;
def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
UnaryIntInt;
def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
UnaryIntInt;
def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
UnaryIntInt;
def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
TernaryIntInt;
def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
TernaryIntInt;
def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
QuaternaryIntInt;
def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
TernaryIntInt;
def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
BinaryIntInt;
def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
TernaryIntInt;
def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
TernaryIntInt;
def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
TernaryIntFloat;
def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
BinaryIntInt;
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
BinaryIntInt;
def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
BinaryIntInt;
def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
BinaryIntInt;
def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
BinaryIntInt;
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
BinaryIntInt;
def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
TernaryIntInt;
def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
TernaryIntInt;
def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
BinaryIntInt;
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
BinaryIntInt;
def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
BinaryIntInt;
def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
BinaryIntInt;
def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
BinaryIntFloat;
def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
BinaryIntInt;
def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
BinaryIntInt;
def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
BinaryIntFloat;
def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
TernaryIntInt;
def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
TernaryIntInt;
def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
TernaryIntInt;
def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
UnaryIntFloat;
def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
TernaryIntFloat;
def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
UnaryIntFloat;
def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
UnaryIntFloat;
def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
UnaryIntFloat;
def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
UnaryIntFloat;
def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
UnaryIntFloat;
def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
UnaryIntFloat;
def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
UnaryIntFloat;
def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
UnaryIntFloat;
def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
UnaryIntFloat;
def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
UnaryIntFloat;
def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
UnaryIntFloat;
def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
UnaryIntFloat;
def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
UnaryIntFloat;
def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
UnaryIntFloat;
def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
UnaryIntFloat;
def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
UnaryIntFloat;
def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
UnaryIntFloat;
def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
UnaryIntFloat;
def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
UnaryIntFloat;
def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
UnaryIntFloat;
def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
TernaryIntFloat;
def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
UnaryIntFloat;
def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
UnaryIntFloat;
def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
UnaryIntFloat;
def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
TernaryIntFloat;
def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_i32_ty], []>;
def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
ConvertIntITOF;
def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
ConvertIntFTOI;
def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
ConvertIntITOF;
def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
ConvertIntITOF;
def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
ConvertIntITOF;
def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
ConvertIntITOF;
def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
llvm_v2f32_ty, llvm_float_ty], []>;
def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
llvm_v2f32_ty], []>;
def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], []>;
def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], []>;
}

View file

@ -1,71 +0,0 @@
//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
#include "AMDILNIDevice.h"
#include "AMDILEvergreenDevice.h"
#include "AMDGPUSubtarget.h"
using namespace llvm;
AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST)
{
std::string name = ST->getDeviceName();
if (name == "caicos") {
mDeviceFlag = OCL_DEVICE_CAICOS;
} else if (name == "turks") {
mDeviceFlag = OCL_DEVICE_TURKS;
} else if (name == "cayman") {
mDeviceFlag = OCL_DEVICE_CAYMAN;
} else {
mDeviceFlag = OCL_DEVICE_BARTS;
}
}
AMDGPUNIDevice::~AMDGPUNIDevice()
{
}
size_t
AMDGPUNIDevice::getMaxLDSSize() const
{
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return MAX_LDS_SIZE_900;
} else {
return 0;
}
}
uint32_t
AMDGPUNIDevice::getGeneration() const
{
return AMDGPUDeviceInfo::HD6XXX;
}
AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
: AMDGPUNIDevice(ST)
{
setCaps();
}
AMDGPUCaymanDevice::~AMDGPUCaymanDevice()
{
}
void
AMDGPUCaymanDevice::setCaps()
{
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
mHWBits.set(AMDGPUDeviceInfo::FMA);
}
mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
}

View file

@ -1,59 +0,0 @@
//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Interface for the subtarget data classes.
//
//===---------------------------------------------------------------------===//
// This file will define the interface that each generation needs to
// implement in order to correctly answer queries on the capabilities of the
// specific hardware.
//===---------------------------------------------------------------------===//
#ifndef _AMDILNIDEVICE_H_
#define _AMDILNIDEVICE_H_
#include "AMDILEvergreenDevice.h"
#include "AMDGPUSubtarget.h"
namespace llvm {
class AMDGPUSubtarget;
//===---------------------------------------------------------------------===//
// NI generation of devices and their respective sub classes
//===---------------------------------------------------------------------===//
// The AMDGPUNIDevice is the base class for all Northern Island series of
// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
// exception being differences in wavefront size and hardware capabilities. The
// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
// integer operations
class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
public:
AMDGPUNIDevice(AMDGPUSubtarget*);
virtual ~AMDGPUNIDevice();
virtual size_t getMaxLDSSize() const;
virtual uint32_t getGeneration() const;
protected:
}; // AMDGPUNIDevice
// Just as the AMDGPUCypressDevice is the double capable version of the
// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version of
// the AMDGPUNIDevice. The other major difference that is not as useful from
// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
// NI family is a 5 wide.
class AMDGPUCaymanDevice: public AMDGPUNIDevice {
public:
AMDGPUCaymanDevice(AMDGPUSubtarget*);
virtual ~AMDGPUCaymanDevice();
private:
virtual void setCaps();
}; // AMDGPUCaymanDevice
static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
} // namespace llvm
#endif // _AMDILNIDEVICE_H_

File diff suppressed because it is too large Load diff

View file

@ -1,110 +0,0 @@
//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Declarations that describe the AMDIL register file
//
//===----------------------------------------------------------------------===//
class AMDILReg<bits<16> num, string n> : Register<n> {
field bits<16> Value;
let Value = num;
let Namespace = "AMDGPU";
}
// We will start with 8 registers for each class before expanding to more
// Since the swizzle is added based on the register class, we can leave it
// off here and just specify different registers for different register classes
def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
// All registers between 1000 and 1024 are reserved and cannot be used
// unless commented in this section
// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
// r1020 is used to hold the frame index for local arrays
// r1019 is used to hold the dynamic stack allocation pointer
// r1018 is used as a temporary register for handwritten code
// r1017 is used as a temporary register for handwritten code
// r1016 is used as a temporary register for load/store code
// r1015 is used as a temporary register for data segment offset
// r1014 is used as a temporary register for store code
// r1013 is used as the section data pointer register
// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
// r1009 is used as the frame pointer register
// r999 is used as the mem register.
// r998 is used as the return address register.
//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
{
let AltOrders = [(add (sequence "R%u", 1, 20))];
let AltOrderSelect = [{
return 1;
}];
}
def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
{
let AltOrders = [(add (sequence "R%u", 1, 20))];
let AltOrderSelect = [{
return 1;
}];
}
def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
{
let AltOrders = [(add (sequence "R%u", 1, 20))];
let AltOrderSelect = [{
return 1;
}];
}

View file

@ -1,49 +0,0 @@
//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
#include "AMDILSIDevice.h"
#include "AMDILEvergreenDevice.h"
#include "AMDILNIDevice.h"
#include "AMDGPUSubtarget.h"
using namespace llvm;
AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST)
{
}
AMDGPUSIDevice::~AMDGPUSIDevice()
{
}
size_t
AMDGPUSIDevice::getMaxLDSSize() const
{
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return MAX_LDS_SIZE_900;
} else {
return 0;
}
}
uint32_t
AMDGPUSIDevice::getGeneration() const
{
return AMDGPUDeviceInfo::HD7XXX;
}
std::string
AMDGPUSIDevice::getDataLayout() const
{
return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
"-n8:16:32:64");
}

View file

@ -1,45 +0,0 @@
//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Interface for the subtarget data classes.
//
//===---------------------------------------------------------------------===//
// This file will define the interface that each generation needs to
// implement in order to correctly answer queries on the capabilities of the
// specific hardware.
//===---------------------------------------------------------------------===//
#ifndef _AMDILSIDEVICE_H_
#define _AMDILSIDEVICE_H_
#include "AMDILEvergreenDevice.h"
#include "AMDGPUSubtarget.h"
namespace llvm {
class AMDGPUSubtarget;
//===---------------------------------------------------------------------===//
// SI generation of devices and their respective sub classes
//===---------------------------------------------------------------------===//
// The AMDGPUSIDevice is the base class for all Northern Island series of
// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
// exception being differences in wavefront size and hardware capabilities. The
// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
// integer operations
class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
public:
AMDGPUSIDevice(AMDGPUSubtarget*);
virtual ~AMDGPUSIDevice();
virtual size_t getMaxLDSSize() const;
virtual uint32_t getGeneration() const;
virtual std::string getDataLayout() const;
protected:
}; // AMDGPUSIDevice
} // namespace llvm
#endif // _AMDILSIDEVICE_H_

View file

@ -1,75 +0,0 @@
//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file provides helper macros for expanding case statements.
//
//===----------------------------------------------------------------------===//
#ifndef AMDILUTILITYFUNCTIONS_H_
#define AMDILUTILITYFUNCTIONS_H_
// Macros that are used to help with switch statements for various data types
// However, these macro's do not return anything unlike the second set below.
#define ExpandCaseTo32bitIntTypes(Instr) \
case Instr##_i32:
#define ExpandCaseTo32bitIntTruncTypes(Instr) \
case Instr##_i32i8: \
case Instr##_i32i16:
#define ExpandCaseToIntTypes(Instr) \
ExpandCaseTo32bitIntTypes(Instr)
#define ExpandCaseToIntTruncTypes(Instr) \
ExpandCaseTo32bitIntTruncTypes(Instr)
#define ExpandCaseToFloatTypes(Instr) \
case Instr##_f32:
#define ExpandCaseTo32bitScalarTypes(Instr) \
ExpandCaseTo32bitIntTypes(Instr) \
case Instr##_f32:
#define ExpandCaseToAllScalarTypes(Instr) \
ExpandCaseToFloatTypes(Instr) \
ExpandCaseToIntTypes(Instr)
#define ExpandCaseToAllScalarTruncTypes(Instr) \
ExpandCaseToFloatTruncTypes(Instr) \
ExpandCaseToIntTruncTypes(Instr)
#define ExpandCaseToAllTypes(Instr) \
ExpandCaseToAllScalarTypes(Instr)
#define ExpandCaseToAllTruncTypes(Instr) \
ExpandCaseToAllScalarTruncTypes(Instr)
// Macros that expand into statements with return values
#define ExpandCaseTo32bitIntReturn(Instr, Return) \
case Instr##_i32: return Return##_i32;
#define ExpandCaseToIntReturn(Instr, Return) \
ExpandCaseTo32bitIntReturn(Instr, Return)
#define ExpandCaseToFloatReturn(Instr, Return) \
case Instr##_f32: return Return##_f32;\
#define ExpandCaseToAllScalarReturn(Instr, Return) \
ExpandCaseToFloatReturn(Instr, Return) \
ExpandCaseToIntReturn(Instr, Return)
// These macros expand to common groupings of RegClass ID's
#define ExpandCaseTo1CompRegID \
case AMDGPU::GPRI32RegClassID: \
case AMDGPU::GPRF32RegClassID:
#define ExpandCaseTo32BitType(Instr) \
case Instr##_i32: \
case Instr##_f32:
#endif // AMDILUTILITYFUNCTIONS_H_

View file

@ -1,34 +0,0 @@
#include "AMDGPUInstPrinter.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
printInstruction(MI, OS);
printAnnotation(OS, Annot);
}
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
O << getRegisterName(Op.getReg());
} else if (Op.isImm()) {
O << Op.getImm();
} else if (Op.isFPImm()) {
O << Op.getFPImm();
} else {
assert(!"unknown operand type in printOperand");
}
}
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printOperand(MI, OpNo, O);
}
#include "AMDGPUGenAsmWriter.inc"

View file

@ -1,34 +0,0 @@
#ifndef AMDGPUINSTPRINTER_H
#define AMDGPUINSTPRINTER_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
class AMDGPUInstPrinter : public MCInstPrinter {
public:
AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
//Autogenerated by tblgen
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
// virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
// void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // End namespace llvm
#endif // AMDGPUINSTRPRINTER_H

View file

@ -1,43 +0,0 @@
==============================================================================
LLVM Release License
==============================================================================
University of Illinois/NCSA
Open Source License
Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
All rights reserved.
Developed by:
LLVM Team
University of Illinois at Urbana-Champaign
http://llvm.org
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal with
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimers.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimers in the
documentation and/or other materials provided with the distribution.
* Neither the names of the LLVM Team, University of Illinois at
Urbana-Champaign, nor the names of its contributors may be used to
endorse or promote products derived from this Software without specific
prior written permission.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.

View file

@ -1,80 +0,0 @@
//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
class AMDGPUMCObjectWriter : public MCObjectWriter {
public:
AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
//XXX: Implement if necessary.
}
virtual void RecordRelocation(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup,
MCValue Target, uint64_t &FixedValue) {
assert(!"Not implemented");
}
virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
};
class AMDGPUAsmBackend : public MCAsmBackend {
public:
AMDGPUAsmBackend(const Target &T)
: MCAsmBackend() {}
virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
virtual unsigned getNumFixupKinds() const { return 0; };
virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const { assert(!"Not implemented"); }
virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCInstFragment *DF,
const MCAsmLayout &Layout) const {
return false;
}
virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
assert(!"Not implemented");
}
virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
};
} //End anonymous namespace
void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
Asm.writeSectionData(I, Layout);
}
}
MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT) {
return new AMDGPUAsmBackend(T);
}
AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
raw_ostream &OS) const {
return new AMDGPUMCObjectWriter(OS);
}

View file

@ -1,96 +0,0 @@
//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUMCAsmInfo.h"
using namespace llvm;
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
{
HasSingleParameterDotFile = false;
WeakDefDirective = 0;
//===------------------------------------------------------------------===//
HasSubsectionsViaSymbols = true;
HasMachoZeroFillDirective = false;
HasMachoTBSSDirective = false;
HasStaticCtorDtorReferenceInStaticMode = false;
LinkerRequiresNonEmptyDwarfLines = true;
MaxInstLength = 16;
PCSymbol = "$";
SeparatorString = "\n";
CommentColumn = 40;
CommentString = ";";
LabelSuffix = ":";
GlobalPrefix = "@";
PrivateGlobalPrefix = ";.";
LinkerPrivateGlobalPrefix = "!";
InlineAsmStart = ";#ASMSTART";
InlineAsmEnd = ";#ASMEND";
AssemblerDialect = 0;
AllowQuotesInName = false;
AllowNameToStartWithDigit = false;
AllowPeriodsInName = false;
//===--- Data Emission Directives -------------------------------------===//
ZeroDirective = ".zero";
AsciiDirective = ".ascii\t";
AscizDirective = ".asciz\t";
Data8bitsDirective = ".byte\t";
Data16bitsDirective = ".short\t";
Data32bitsDirective = ".long\t";
Data64bitsDirective = ".quad\t";
GPRel32Directive = 0;
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
HasMicrosoftFastStdCallMangling = false;
//===--- Alignment Information ----------------------------------------===//
AlignDirective = ".align\t";
AlignmentIsInBytes = true;
TextAlignFillValue = 0;
//===--- Global Variable Emission Directives --------------------------===//
GlobalDirective = ".global";
ExternDirective = ".extern";
HasSetDirective = false;
HasAggressiveSymbolFolding = true;
LCOMMDirectiveType = LCOMM::None;
COMMDirectiveAlignmentIsInBytes = false;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
HasSymbolResolver = false;
WeakRefDirective = ".weakref\t";
LinkOnceDirective = 0;
//===--- Dwarf Emission Directives -----------------------------------===//
HasLEB128 = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::None;
DwarfUsesInlineInfoSection = false;
DwarfSectionOffsetDirective = ".offset";
DwarfUsesLabelOffsetForRanges = true;
//===--- CBE Asm Translation Table -----------------------------------===//
AsmTransCBE = 0;
}
const char*
AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
{
switch (AS) {
default:
return 0;
case 0:
return 0;
};
return 0;
}
const MCSection*
AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
{
return 0;
}

View file

@ -1,30 +0,0 @@
//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// TODO: Add full description
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUMCASMINFO_H_
#define AMDGPUMCASMINFO_H_
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
class StringRef;
class AMDGPUMCAsmInfo : public MCAsmInfo {
public:
explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
const char*
getDataASDirective(unsigned int Size, unsigned int AS) const;
const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
};
} // namespace llvm
#endif // AMDGPUMCASMINFO_H_

View file

@ -1,59 +0,0 @@
//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// CodeEmitter interface for R600 and SI codegen.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUCODEEMITTER_H
#define AMDGPUCODEEMITTER_H
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
class MCInst;
class MCOperand;
class AMDGPUMCCodeEmitter : public MCCodeEmitter {
public:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups) const;
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
return Value;
}
virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
};
} // End namespace llvm
#endif // AMDGPUCODEEMITTER_H

View file

@ -1,111 +0,0 @@
//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides AMDGPU specific target descriptions.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUMCTargetDesc.h"
#include "AMDGPUMCAsmInfo.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "AMDGPUGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
#include "AMDGPUGenSubtargetInfo.inc"
#define GET_REGINFO_MC_DESC
#include "AMDGPUGenRegisterInfo.inc"
using namespace llvm;
static MCInstrInfo *createAMDGPUMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitAMDGPUMCInstrInfo(X);
return X;
}
static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitAMDGPUMCRegisterInfo(X, 0);
return X;
}
static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo * X = new MCSubtargetInfo();
InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
return X;
}
static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
return createSIMCCodeEmitter(MCII, STI, Ctx);
} else {
return createR600MCCodeEmitter(MCII, STI, Ctx);
}
}
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
raw_ostream &_OS,
MCCodeEmitter *_Emitter,
bool RelaxAll,
bool NoExecStack) {
return createPureStreamer(Ctx, MAB, _OS, _Emitter);
}
extern "C" void LLVMInitializeAMDGPUTargetMC() {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
}

View file

@ -1,51 +0,0 @@
//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides AMDGPU specific target descriptions.
//
//===----------------------------------------------------------------------===//
//
#ifndef AMDGPUMCTARGETDESC_H
#define AMDGPUMCTARGETDESC_H
#include "llvm/ADT/StringRef.h"
namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
extern Target TheAMDGPUTarget;
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT);
} // End llvm namespace
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
#define GET_INSTRINFO_ENUM
#include "AMDGPUGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
#endif // AMDGPUMCTARGETDESC_H

View file

@ -1,727 +0,0 @@
//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This code emitters outputs bytecode that is understood by the r600g driver
// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
// except that the size of the instruction fields are rounded up to the
// nearest byte.
//
// [1] http://www.mesa3d.org/
//
//===----------------------------------------------------------------------===//
#include "R600Defines.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
#include <stdio.h>
#define SRC_BYTE_COUNT 11
#define DST_BYTE_COUNT 5
using namespace llvm;
namespace {
class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
MCContext &Ctx;
public:
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
: MCII(mcii), STI(sti), Ctx(ctx) { }
/// EncodeInstruction - Encode the instruction and write it to the OS.
virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
/// getMachineOpValue - Reutrn the encoding for an MCOperand.
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const;
private:
void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
raw_ostream &OS) const;
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
void EmitByte(unsigned int byte, raw_ostream &OS) const;
void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
void Emit(uint32_t value, raw_ostream &OS) const;
void Emit(uint64_t value, raw_ostream &OS) const;
unsigned getHWRegIndex(unsigned reg) const;
unsigned getHWRegChan(unsigned reg) const;
unsigned getHWReg(unsigned regNo) const;
bool isFCOp(unsigned opcode) const;
bool isTexOp(unsigned opcode) const;
bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
/// getHWRegIndexGen - Get the register's hardware index. Implemented in
/// R600HwRegInfo.include.
unsigned getHWRegIndexGen(unsigned int Reg) const;
/// getHWRegChanGen - Get the register's channel. Implemented in
/// R600HwRegInfo.include.
unsigned getHWRegChanGen(unsigned int Reg) const;
};
} // End anonymous namespace
enum RegElement {
ELEMENT_X = 0,
ELEMENT_Y,
ELEMENT_Z,
ELEMENT_W
};
enum InstrTypes {
INSTR_ALU = 0,
INSTR_TEX,
INSTR_FC,
INSTR_NATIVE,
INSTR_VTX
};
enum FCInstr {
FC_IF = 0,
FC_IF_INT,
FC_ELSE,
FC_ENDIF,
FC_BGNLOOP,
FC_ENDLOOP,
FC_BREAK,
FC_BREAK_NZ_INT,
FC_CONTINUE,
FC_BREAK_Z_INT,
FC_BREAK_NZ
};
enum TextureTypes {
TEXTURE_1D = 1,
TEXTURE_2D,
TEXTURE_3D,
TEXTURE_CUBE,
TEXTURE_RECT,
TEXTURE_SHADOW1D,
TEXTURE_SHADOW2D,
TEXTURE_SHADOWRECT,
TEXTURE_1D_ARRAY,
TEXTURE_2D_ARRAY,
TEXTURE_SHADOW1D_ARRAY,
TEXTURE_SHADOW2D_ARRAY
};
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new R600MCCodeEmitter(MCII, STI, Ctx);
}
void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
if (isTexOp(MI.getOpcode())) {
EmitTexInstr(MI, Fixups, OS);
} else if (isFCOp(MI.getOpcode())){
EmitFCInstr(MI, OS);
} else if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::BUNDLE ||
MI.getOpcode() == AMDGPU::KILL) {
return;
} else {
switch(MI.getOpcode()) {
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
{
uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
EmitByte(INSTR_NATIVE, OS);
Emit(inst, OS);
break;
}
case AMDGPU::CONSTANT_LOAD_eg:
case AMDGPU::VTX_READ_PARAM_i32_eg:
case AMDGPU::VTX_READ_PARAM_f32_eg:
case AMDGPU::VTX_READ_GLOBAL_i8_eg:
case AMDGPU::VTX_READ_GLOBAL_i32_eg:
case AMDGPU::VTX_READ_GLOBAL_f32_eg:
case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
{
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
EmitByte(INSTR_VTX, OS);
Emit(InstWord01, OS);
Emit(InstWord2, OS);
break;
}
default:
EmitALUInstr(MI, Fixups, OS);
break;
}
}
}
void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const {
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
unsigned NumOperands = MI.getNumOperands();
if(MCDesc.findFirstPredOperandIdx() > -1)
NumOperands--;
if (GET_FLAG_OPERAND_IDX(MCDesc.TSFlags) != 0)
NumOperands--;
if(MI.getOpcode() == AMDGPU::PRED_X)
NumOperands = 2;
// XXX Check if instruction writes a result
if (NumOperands < 1) {
return;
}
// Emit instruction type
EmitByte(INSTR_ALU, OS);
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
//older alu have different encoding for instructions with one or two src
//parameters.
if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
!(MCDesc.TSFlags & R600_InstFlag::OP3)) {
uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
InstWord01 &= ~(0x3FFULL << 39);
InstWord01 |= ISAOpCode << 1;
}
unsigned int OpIndex;
for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) {
// Literal constants are always stored as the last operand.
if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) {
break;
}
EmitSrcISA(MI, OpIndex, InstWord01, OS);
}
// Emit zeros for unused sources
for ( ; OpIndex < 4; OpIndex++) {
EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
}
// Emit destination register
const MCOperand &dstOp = MI.getOperand(0);
if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) {
//element of destination register
InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61;
// isClamped
if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) {
InstWord01 |= 1ULL << 63;
}
// write mask
if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) {
InstWord01 |= 1ULL << 36;
}
// XXX: Emit relative addressing mode
}
// Emit ALU
// Emit IsLast (for this instruction group) (1 byte)
if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) {
InstWord01 |= 1ULL << 31;
}
// XXX: Emit push modifier
if(isFlagSet(MI, 1, MO_FLAG_PUSH)) {
InstWord01 |= 1ULL << 34;
}
// XXX: Emit predicate (1 byte)
int PredIdx = MCDesc.findFirstPredOperandIdx();
if (PredIdx != -1) {
switch(MI.getOperand(PredIdx).getReg()) {
case AMDGPU::PRED_SEL_ZERO:
InstWord01 |= 2ULL << 29;
break;
case AMDGPU::PRED_SEL_ONE:
InstWord01 |= 3ULL << 29;
break;
}
}
//XXX: predicate
//XXX: bank swizzle
//XXX: OMOD
//XXX: index mode
Emit(InstWord01, OS);
}
void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
raw_ostream &OS) const {
const MCOperand &MO = MI.getOperand(OpIdx);
union {
float f;
uint32_t i;
} Value;
Value.i = 0;
// Emit the source select (2 bytes). For GPRs, this is the register index.
// For other potential instruction operands, (e.g. constant registers) the
// value of the source select is defined in the r600isa docs.
if (MO.isReg()) {
unsigned reg = MO.getReg();
EmitTwoBytes(getHWReg(reg), OS);
if (reg == AMDGPU::ALU_LITERAL_X) {
unsigned ImmOpIndex = MI.getNumOperands() - 1;
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
if (ImmOp.isFPImm()) {
Value.f = ImmOp.getFPImm();
} else {
assert(ImmOp.isImm());
Value.i = ImmOp.getImm();
}
}
} else {
// XXX: Handle other operand types.
EmitTwoBytes(0, OS);
}
// Emit the source channel (1 byte)
if (MO.isReg()) {
EmitByte(getHWRegChan(MO.getReg()), OS);
} else {
EmitByte(0, OS);
}
// XXX: Emit isNegated (1 byte)
if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
&& (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
(MO.isReg() &&
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
EmitByte(1, OS);
} else {
EmitByte(0, OS);
}
// Emit isAbsolute (1 byte)
if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
EmitByte(1, OS);
} else {
EmitByte(0, OS);
}
// XXX: Emit relative addressing mode (1 byte)
EmitByte(0, OS);
// Emit kc_bank, This will be adjusted later by r600_asm
EmitByte(0, OS);
// Emit the literal value, if applicable (4 bytes).
Emit(Value.i, OS);
}
void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
uint64_t &Value, raw_ostream &OS) const {
const MCOperand &MO = MI.getOperand(OpIdx);
union {
float f;
uint32_t i;
} InlineConstant;
InlineConstant.i = 0;
// Emit the source select (2 bytes). For GPRs, this is the register index.
// For other potential instruction operands, (e.g. constant registers) the
// value of the source select is defined in the r600isa docs.
if (MO.isReg()) {
unsigned Reg = MO.getReg();
if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
EmitByte(1, OS);
} else {
EmitByte(0, OS);
}
if (Reg == AMDGPU::ALU_LITERAL_X) {
unsigned ImmOpIndex = MI.getNumOperands() - 1;
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
if (ImmOp.isFPImm()) {
InlineConstant.f = ImmOp.getFPImm();
} else {
assert(ImmOp.isImm());
InlineConstant.i = ImmOp.getImm();
}
}
} else {
// XXX: Handle other operand types.
EmitTwoBytes(0, OS);
}
// source channel
uint64_t sourceChannelValue = getHWRegChan(MO.getReg());
if (OpIdx == 1)
Value |= sourceChannelValue << 10;
if (OpIdx == 2)
Value |= sourceChannelValue << 23;
if (OpIdx == 3)
Value |= sourceChannelValue << 42;
// isNegated
if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
&& (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
(MO.isReg() &&
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
if (OpIdx == 1)
Value |= 1ULL << 12;
else if (OpIdx == 2)
Value |= 1ULL << 25;
else if (OpIdx == 3)
Value |= 1ULL << 44;
}
// isAbsolute
if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
assert(OpIdx < 3);
Value |= 1ULL << (32+OpIdx-1);
}
// XXX: relative addressing mode
// XXX: kc_bank
// Emit the literal value, if applicable (4 bytes).
Emit(InlineConstant.i, OS);
}
void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const {
unsigned opcode = MI.getOpcode();
bool hasOffsets = (opcode == AMDGPU::TEX_LD);
unsigned op_offset = hasOffsets ? 3 : 0;
int64_t sampler = MI.getOperand(op_offset+2).getImm();
int64_t textureType = MI.getOperand(op_offset+3).getImm();
unsigned srcSelect[4] = {0, 1, 2, 3};
// Emit instruction type
EmitByte(1, OS);
// Emit instruction
EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
// XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
EmitByte(sampler + 1 + 1, OS);
// Emit source register
EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
// XXX: Emit src isRelativeAddress
EmitByte(0, OS);
// Emit destination register
EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
// XXX: Emit dst isRealtiveAddress
EmitByte(0, OS);
// XXX: Emit dst select
EmitByte(0, OS); // X
EmitByte(1, OS); // Y
EmitByte(2, OS); // Z
EmitByte(3, OS); // W
// XXX: Emit lod bias
EmitByte(0, OS);
// XXX: Emit coord types
unsigned coordType[4] = {1, 1, 1, 1};
if (textureType == TEXTURE_RECT
|| textureType == TEXTURE_SHADOWRECT) {
coordType[ELEMENT_X] = 0;
coordType[ELEMENT_Y] = 0;
}
if (textureType == TEXTURE_1D_ARRAY
|| textureType == TEXTURE_SHADOW1D_ARRAY) {
if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
coordType[ELEMENT_Y] = 0;
} else {
coordType[ELEMENT_Z] = 0;
srcSelect[ELEMENT_Z] = ELEMENT_Y;
}
} else if (textureType == TEXTURE_2D_ARRAY
|| textureType == TEXTURE_SHADOW2D_ARRAY) {
coordType[ELEMENT_Z] = 0;
}
for (unsigned i = 0; i < 4; i++) {
EmitByte(coordType[i], OS);
}
// XXX: Emit offsets
if (hasOffsets)
for (unsigned i = 2; i < 5; i++)
EmitByte(MI.getOperand(i).getImm()<<1, OS);
else
EmitNullBytes(3, OS);
// Emit sampler id
EmitByte(sampler, OS);
// XXX:Emit source select
if ((textureType == TEXTURE_SHADOW1D
|| textureType == TEXTURE_SHADOW2D
|| textureType == TEXTURE_SHADOWRECT
|| textureType == TEXTURE_SHADOW1D_ARRAY)
&& opcode != AMDGPU::TEX_SAMPLE_C_L
&& opcode != AMDGPU::TEX_SAMPLE_C_LB) {
srcSelect[ELEMENT_W] = ELEMENT_Z;
}
for (unsigned i = 0; i < 4; i++) {
EmitByte(srcSelect[i], OS);
}
}
void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
// Emit instruction type
EmitByte(INSTR_FC, OS);
// Emit SRC
unsigned NumOperands = MI.getNumOperands();
if (NumOperands > 0) {
assert(NumOperands == 1);
EmitSrc(MI, 0, OS);
} else {
EmitNullBytes(SRC_BYTE_COUNT, OS);
}
// Emit FC Instruction
enum FCInstr instr;
switch (MI.getOpcode()) {
case AMDGPU::BREAK_LOGICALZ_f32:
instr = FC_BREAK;
break;
case AMDGPU::BREAK_LOGICALNZ_f32:
instr = FC_BREAK_NZ;
break;
case AMDGPU::BREAK_LOGICALNZ_i32:
instr = FC_BREAK_NZ_INT;
break;
case AMDGPU::BREAK_LOGICALZ_i32:
instr = FC_BREAK_Z_INT;
break;
case AMDGPU::CONTINUE_LOGICALNZ_f32:
case AMDGPU::CONTINUE_LOGICALNZ_i32:
instr = FC_CONTINUE;
break;
case AMDGPU::IF_LOGICALNZ_f32:
instr = FC_IF;
case AMDGPU::IF_LOGICALNZ_i32:
instr = FC_IF_INT;
break;
case AMDGPU::IF_LOGICALZ_f32:
abort();
break;
case AMDGPU::ELSE:
instr = FC_ELSE;
break;
case AMDGPU::ENDIF:
instr = FC_ENDIF;
break;
case AMDGPU::ENDLOOP:
instr = FC_ENDLOOP;
break;
case AMDGPU::WHILELOOP:
instr = FC_BGNLOOP;
break;
default:
abort();
break;
}
EmitByte(instr, OS);
}
void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
raw_ostream &OS) const {
for (unsigned int i = 0; i < ByteCount; i++) {
EmitByte(0, OS);
}
}
void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
OS.write((uint8_t) Byte & 0xff);
}
void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
raw_ostream &OS) const {
OS.write((uint8_t) (Bytes & 0xff));
OS.write((uint8_t) ((Bytes >> 8) & 0xff));
}
void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
for (unsigned i = 0; i < 4; i++) {
OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
}
}
void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
for (unsigned i = 0; i < 8; i++) {
EmitByte((Value >> (8 * i)) & 0xff, OS);
}
}
unsigned R600MCCodeEmitter::getHWRegIndex(unsigned reg) const {
switch(reg) {
case AMDGPU::ZERO: return 248;
case AMDGPU::ONE:
case AMDGPU::NEG_ONE: return 249;
case AMDGPU::ONE_INT: return 250;
case AMDGPU::HALF:
case AMDGPU::NEG_HALF: return 252;
case AMDGPU::ALU_LITERAL_X: return 253;
case AMDGPU::PREDICATE_BIT:
case AMDGPU::PRED_SEL_OFF:
case AMDGPU::PRED_SEL_ZERO:
case AMDGPU::PRED_SEL_ONE:
return 0;
default: return getHWRegIndexGen(reg);
}
}
unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
switch(reg) {
case AMDGPU::ZERO:
case AMDGPU::ONE:
case AMDGPU::ONE_INT:
case AMDGPU::NEG_ONE:
case AMDGPU::HALF:
case AMDGPU::NEG_HALF:
case AMDGPU::ALU_LITERAL_X:
case AMDGPU::PREDICATE_BIT:
case AMDGPU::PRED_SEL_OFF:
case AMDGPU::PRED_SEL_ZERO:
case AMDGPU::PRED_SEL_ONE:
return 0;
default: return getHWRegChanGen(reg);
}
}
unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
unsigned HWReg;
HWReg = getHWRegIndex(RegNo);
if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(RegNo)) {
HWReg += 512;
}
return HWReg;
}
uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixup) const {
if (MO.isReg()) {
return getHWRegIndex(MO.getReg());
} else if (MO.isImm()) {
return MO.getImm();
} else {
assert(0);
return 0;
}
}
//===----------------------------------------------------------------------===//
// Encoding helper functions
//===----------------------------------------------------------------------===//
bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
switch(opcode) {
default: return false;
case AMDGPU::BREAK_LOGICALZ_f32:
case AMDGPU::BREAK_LOGICALNZ_i32:
case AMDGPU::BREAK_LOGICALZ_i32:
case AMDGPU::BREAK_LOGICALNZ_f32:
case AMDGPU::CONTINUE_LOGICALNZ_f32:
case AMDGPU::IF_LOGICALNZ_i32:
case AMDGPU::IF_LOGICALZ_f32:
case AMDGPU::ELSE:
case AMDGPU::ENDIF:
case AMDGPU::ENDLOOP:
case AMDGPU::IF_LOGICALNZ_f32:
case AMDGPU::WHILELOOP:
return true;
}
}
bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
switch(opcode) {
default: return false;
case AMDGPU::TEX_LD:
case AMDGPU::TEX_GET_TEXTURE_RESINFO:
case AMDGPU::TEX_SAMPLE:
case AMDGPU::TEX_SAMPLE_C:
case AMDGPU::TEX_SAMPLE_L:
case AMDGPU::TEX_SAMPLE_C_L:
case AMDGPU::TEX_SAMPLE_LB:
case AMDGPU::TEX_SAMPLE_C_LB:
case AMDGPU::TEX_SAMPLE_G:
case AMDGPU::TEX_SAMPLE_C_G:
case AMDGPU::TEX_GET_GRADIENTS_H:
case AMDGPU::TEX_GET_GRADIENTS_V:
case AMDGPU::TEX_SET_GRADIENTS_H:
case AMDGPU::TEX_SET_GRADIENTS_V:
return true;
}
}
bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
unsigned Flag) const {
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
if (FlagIndex == 0) {
return false;
}
assert(MI.getOperand(FlagIndex).isImm());
return !!((MI.getOperand(FlagIndex).getImm() >>
(NUM_MO_FLAGS * Operand)) & Flag);
}
#define R600RegisterInfo R600MCCodeEmitter
#include "R600HwRegInfo.include"
#undef R600RegisterInfo
#include "AMDGPUGenMCCodeEmitter.inc"

View file

@ -1,296 +0,0 @@
//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The SI code emitter produces machine code that can be executed directly on
// the GPU device.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
// These must be kept in sync with SIInstructions.td and also the
// InstrEncodingInfo array in SIInstrInfo.cpp.
//
// NOTE: This enum is only used to identify the encoding type within LLVM,
// the actual encoding type that is part of the instruction format is different
namespace SIInstrEncodingType {
enum Encoding {
EXP = 0,
LDS = 1,
MIMG = 2,
MTBUF = 3,
MUBUF = 4,
SMRD = 5,
SOP1 = 6,
SOP2 = 7,
SOPC = 8,
SOPK = 9,
SOPP = 10,
VINTRP = 11,
VOP1 = 12,
VOP2 = 13,
VOP3 = 14,
VOPC = 15
};
}
using namespace llvm;
namespace {
class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
MCContext &Ctx;
public:
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
: MCII(mcii), STI(sti), Ctx(ctx) { }
~SIMCCodeEmitter() { }
/// EncodeInstruction - Encode the instruction and write it to the OS.
virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
/// getMachineOpValue - Reutrn the encoding for an MCOperand.
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const;
public:
/// GPRAlign - Encode a sequence of registers with the correct alignment.
unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
/// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const;
/// GPR4AlignEncode - Encoding for when 4 consectuive registers are used
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const;
/// SMRDmemriEncode - Encoding for SMRD indexed loads
virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const;
/// VOPPostEncode - Post-Encoder method for VOP instructions
virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
private:
///getEncodingType = Return this SIInstrEncodingType for this instruction.
unsigned getEncodingType(const MCInst &MI) const;
///getEncodingBytes - Get then size in bytes of this instructions encoding.
unsigned getEncodingBytes(const MCInst &MI) const;
/// getRegBinaryCode - Returns the hardware encoding for a register
unsigned getRegBinaryCode(unsigned reg) const;
/// getHWRegNum - Generated function that returns the hardware encoding for
/// a register
unsigned getHWRegNum(unsigned reg) const;
};
} // End anonymous namespace
MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new SIMCCodeEmitter(MCII, STI, Ctx);
}
void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
unsigned bytes = getEncodingBytes(MI);
for (unsigned i = 0; i < bytes; i++) {
OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
}
}
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg()) {
return getRegBinaryCode(MO.getReg());
} else if (MO.isImm()) {
return MO.getImm();
} else if (MO.isFPImm()) {
// XXX: Not all instructions can use inline literals
// XXX: We should make sure this is a 32-bit constant
union {
float F;
uint32_t I;
} Imm;
Imm.F = MO.getFPImm();
return Imm.I;
} else{
llvm_unreachable("Encoding of this operand type is not supported yet.");
}
return 0;
}
//===----------------------------------------------------------------------===//
// Custom Operand Encodings
//===----------------------------------------------------------------------===//
unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
unsigned shift) const {
unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
return regCode >> shift;
return 0;
}
unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
unsigned OpNo ,
SmallVectorImpl<MCFixup> &Fixup) const {
return GPRAlign(MI, OpNo, 1);
}
unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const {
return GPRAlign(MI, OpNo, 2);
}
#define SMRD_OFFSET_MASK 0xff
#define SMRD_IMM_SHIFT 8
#define SMRD_SBASE_MASK 0x3f
#define SMRD_SBASE_SHIFT 9
/// SMRDmemriEncode - This function is responsibe for encoding the offset
/// and the base ptr for SMRD instructions it should return a bit string in
/// this format:
///
/// OFFSET = bits{7-0}
/// IMM = bits{8}
/// SBASE = bits{14-9}
///
uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const {
uint32_t Encoding;
const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
//XXX: Use this function for SMRD loads with register offsets
assert(OffsetOp.isImm());
Encoding =
(getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
| (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
| ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
;
return Encoding;
}
//===----------------------------------------------------------------------===//
// Post Encoder Callbacks
//===----------------------------------------------------------------------===//
uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
unsigned encodingType = getEncodingType(MI);
unsigned numSrcOps;
unsigned vgprBitOffset;
if (encodingType == SIInstrEncodingType::VOP3) {
numSrcOps = 3;
vgprBitOffset = 32;
} else {
numSrcOps = 1;
vgprBitOffset = 0;
}
// Add one to skip over the destination reg operand.
for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
const MCOperand &MO = MI.getOperand(opIdx);
if (MO.isReg()) {
unsigned reg = MI.getOperand(opIdx).getReg();
if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
}
} else if (MO.isFPImm()) {
union {
float f;
uint32_t i;
} Imm;
// XXX: Not all instructions can use inline literals
// XXX: We should make sure this is a 32-bit constant
Imm.f = MO.getFPImm();
Value |= ((uint64_t)Imm.i) << 32;
}
}
return Value;
}
//===----------------------------------------------------------------------===//
// Encoding helper functions
//===----------------------------------------------------------------------===//
unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
}
unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
// These instructions aren't real instructions with an encoding type, so
// we need to manually specify their size.
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_LOAD_LITERAL_I32:
case AMDGPU::SI_LOAD_LITERAL_F32:
return 4;
}
unsigned encoding_type = getEncodingType(MI);
switch (encoding_type) {
case SIInstrEncodingType::EXP:
case SIInstrEncodingType::LDS:
case SIInstrEncodingType::MUBUF:
case SIInstrEncodingType::MTBUF:
case SIInstrEncodingType::MIMG:
case SIInstrEncodingType::VOP3:
return 8;
default:
return 4;
}
}
unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
switch (reg) {
case AMDGPU::VCC: return 106;
case AMDGPU::M0: return 124;
case AMDGPU::EXEC: return 126;
case AMDGPU::EXEC_LO: return 126;
case AMDGPU::EXEC_HI: return 127;
case AMDGPU::SREG_LIT_0: return 128;
case AMDGPU::SI_LITERAL_CONSTANT: return 255;
default: return getHWRegNum(reg);
}
}
#define SIRegisterInfo SIMCCodeEmitter
#include "SIRegisterGetHWRegNum.inc"
#undef SIRegisterInfo

View file

@ -8,74 +8,8 @@ LIBNAME = radeon
LIBRARY_INCLUDES = -I$(TOP)/include
TBLGEN = $(LLVM_BINDIR)/llvm-tblgen
CXXFLAGS+= $(LLVM_CXXFLAGS)
ifeq ($(LLVM_VERSION),3.1)
CPP_SOURCES += $(LLVM_CPP_SOURCES)
GENERATED_SOURCES = $(LLVM_GENERATED_SOURCES)
else
CXXFLAGS+= -DEXTERNAL_LLVM
endif
include ../../Makefile.template
CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS))
tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3
HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td)
SIRegisterInfo.td: SIGenRegisterInfo.pl
$(PERL) $^ > $@
SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl
$(PERL) $^ $@ > /dev/null
R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td
ifeq ($(HAVE_LLVM_INTRINSICS),)
cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td
else
cp R600IntrinsicsOpenCL.td R600Intrinsics.td
endif
R600RegisterInfo.td: R600GenRegisterInfo.pl
$(PERL) $^ > $@
AMDGPUGenRegisterInfo.inc: $(TD_FILES)
$(call tablegen, -gen-register-info, AMDGPU.td, $@)
AMDGPUGenInstrInfo.inc: $(TD_FILES)
$(call tablegen, -gen-instr-info, AMDGPU.td, $@)
AMDGPUGenAsmWriter.inc: $(TD_FILES)
$(call tablegen, -gen-asm-writer, AMDGPU.td, $@)
AMDGPUGenDAGISel.inc: $(TD_FILES)
$(call tablegen, -gen-dag-isel, AMDGPU.td, $@)
AMDGPUGenCallingConv.inc: $(TD_FILES)
$(call tablegen, -gen-callingconv, AMDGPU.td, $@)
AMDGPUGenSubtargetInfo.inc: $(TD_FILES)
$(call tablegen, -gen-subtarget, AMDGPU.td, $@)
AMDGPUGenEDInfo.inc: $(TD_FILES)
$(call tablegen, -gen-enhanced-disassembly-info, AMDGPU.td, $@)
AMDGPUGenIntrinsics.inc: $(TD_FILES)
$(call tablegen, -gen-tgt-intrinsic, AMDGPU.td, $@)
AMDGPUGenCodeEmitter.inc: $(TD_FILES)
$(call tablegen, -gen-emitter, AMDGPU.td, $@)
AMDGPUGenMCCodeEmitter.inc: $(TD_FILES)
$(call tablegen, -mc-emitter -gen-emitter, AMDGPU.td, $@)
AMDGPUGenDFAPacketizer.inc: $(TD_FILES)
$(call tablegen, -gen-dfa-packetizer, AMDGPU.td, $@)
LOADER_LIBS=$(shell llvm-config --libs bitreader asmparser)
loader: loader.o libradeon.a
gcc -o loader $(LLVM_LDFLAGS) -L/usr/local/lib $(LDFLAGS) loader.o libradeon.a $(LLVM_LIBS) $(LOADER_LIBS) -lpthread -ldl -lstdc++ -lm

View file

@ -1,86 +1,3 @@
TD_FILES := \
AMDGPU.td \
AMDGPUInstrInfo.td \
AMDGPUInstructions.td \
AMDGPUIntrinsics.td \
AMDGPURegisterInfo.td \
AMDILBase.td \
AMDILInstrInfo.td \
AMDILIntrinsics.td \
AMDILRegisterInfo.td \
Processors.td \
R600Instructions.td \
R600Intrinsics.td \
R600IntrinsicsNoOpenCL.td \
R600IntrinsicsOpenCL.td \
R600RegisterInfo.td \
R600Schedule.td \
SIInstrFormats.td \
SIInstrInfo.td \
SIInstructions.td \
SIIntrinsics.td \
SIRegisterInfo.td \
SISchedule.td
LLVM_GENERATED_SOURCES := \
R600Intrinsics.td \
R600RegisterInfo.td \
SIRegisterInfo.td \
SIRegisterGetHWRegNum.inc \
AMDGPUGenRegisterInfo.inc \
AMDGPUGenInstrInfo.inc \
AMDGPUGenAsmWriter.inc \
AMDGPUGenDAGISel.inc \
AMDGPUGenCallingConv.inc \
AMDGPUGenSubtargetInfo.inc \
AMDGPUGenEDInfo.inc \
AMDGPUGenIntrinsics.inc \
AMDGPUGenCodeEmitter.inc \
AMDGPUGenMCCodeEmitter.inc \
AMDGPUGenDFAPacketizer.inc
LLVM_CPP_SOURCES := \
AMDIL7XXDevice.cpp \
AMDILCFGStructurizer.cpp \
AMDILDevice.cpp \
AMDILDeviceInfo.cpp \
AMDILEvergreenDevice.cpp \
AMDILFrameLowering.cpp \
AMDILIntrinsicInfo.cpp \
AMDILISelDAGToDAG.cpp \
AMDILISelLowering.cpp \
AMDILNIDevice.cpp \
AMDILPeepholeOptimizer.cpp \
AMDILSIDevice.cpp \
AMDGPUAsmPrinter.cpp \
AMDGPUMCInstLower.cpp \
AMDGPUSubtarget.cpp \
AMDGPUTargetMachine.cpp \
AMDGPUISelLowering.cpp \
AMDGPUConvertToISA.cpp \
AMDGPUInstrInfo.cpp \
AMDGPURegisterInfo.cpp \
R600ExpandSpecialInstrs.cpp \
R600ISelLowering.cpp \
R600InstrInfo.cpp \
R600MachineFunctionInfo.cpp \
R600RegisterInfo.cpp \
SIAssignInterpRegs.cpp \
SIInstrInfo.cpp \
SIISelLowering.cpp \
SILowerLiteralConstants.cpp \
SILowerFlowControl.cpp \
SIMachineFunctionInfo.cpp \
SIRegisterInfo.cpp \
InstPrinter/AMDGPUInstPrinter.cpp \
MCTargetDesc/AMDGPUMCAsmInfo.cpp \
MCTargetDesc/AMDGPUAsmBackend.cpp \
MCTargetDesc/AMDGPUMCTargetDesc.cpp \
MCTargetDesc/SIMCCodeEmitter.cpp \
MCTargetDesc/R600MCCodeEmitter.cpp \
TargetInfo/AMDGPUTargetInfo.cpp \
CPP_SOURCES := \
radeon_llvm_emit.cpp

View file

@ -1,29 +0,0 @@
//===-- Processors.td - TODO: Add brief description -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// AMDIL processors supported.
//
//===----------------------------------------------------------------------===//
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
: Processor<Name, itin, Features>;
def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
def : Proc<"rv710", R600_EG_Itin, []>;
def : Proc<"rv730", R600_EG_Itin, []>;
def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;

View file

@ -1,35 +0,0 @@
//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Operand Flags
#define MO_FLAG_CLAMP (1 << 0)
#define MO_FLAG_NEG (1 << 1)
#define MO_FLAG_ABS (1 << 2)
#define MO_FLAG_MASK (1 << 3)
#define MO_FLAG_PUSH (1 << 4)
#define MO_FLAG_NOT_LAST (1 << 5)
#define NUM_MO_FLAGS 6
// Helper for finding getting the operand index for the instruction flags
// operand.
#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
namespace R600_InstFlag {
enum TIF {
TRANS_ONLY = (1 << 0),
TEX = (1 << 1),
REDUCTION = (1 << 2),
FC = (1 << 3),
TRIG = (1 << 4),
OP3 = (1 << 5),
VECTOR = (1 << 6)
//FlagOperand bits 7, 8
};
}

View file

@ -1,292 +0,0 @@
//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Vector, Reduction, and Cube instructions need to fill the entire instruction
// group to work correctly. This pass expands these individual instructions
// into several instructions that will completely fill the instruction group.
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
#include "R600MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
private:
static char ID;
const R600InstrInfo *TII;
bool ExpandInputPerspective(MachineInstr& MI);
bool ExpandInputConstant(MachineInstr& MI);
public:
R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "R600 Expand special instructions pass";
}
};
} // End anonymous namespace
char R600ExpandSpecialInstrsPass::ID = 0;
FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
return new R600ExpandSpecialInstrsPass(TM);
}
bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
{
const R600RegisterInfo &TRI = TII->getRegisterInfo();
if (MI.getOpcode() != AMDGPU::input_perspective)
return false;
MachineBasicBlock::iterator I = &MI;
unsigned DstReg = MI.getOperand(0).getReg();
R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
->getInfo<R600MachineFunctionInfo>();
unsigned IJIndexBase;
// In Evergreen ISA doc section 8.3.2 :
// We need to interpolate XY and ZW in two different instruction groups.
// An INTERP_* must occupy all 4 slots of an instruction group.
// Output of INTERP_XY is written in X,Y slots
// Output of INTERP_ZW is written in Z,W slots
//
// Thus interpolation requires the following sequences :
//
// AnyGPR.x = INTERP_ZW; (Write Masked Out)
// AnyGPR.y = INTERP_ZW; (Write Masked Out)
// DstGPR.z = INTERP_ZW;
// DstGPR.w = INTERP_ZW; (End of first IG)
// DstGPR.x = INTERP_XY;
// DstGPR.y = INTERP_XY;
// AnyGPR.z = INTERP_XY; (Write Masked Out)
// AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
//
switch (MI.getOperand(1).getImm()) {
case 0:
IJIndexBase = MFI->GetIJPerspectiveIndex();
break;
case 1:
IJIndexBase = MFI->GetIJLinearIndex();
break;
default:
assert(0 && "Unknow ij index");
}
for (unsigned i = 0; i < 8; i++) {
unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
2 * IJIndexBase + ((i + 1) % 2));
unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
4 * MI.getOperand(2).getImm());
unsigned Sel;
switch (i % 4) {
case 0:Sel = AMDGPU::sel_x;break;
case 1:Sel = AMDGPU::sel_y;break;
case 2:Sel = AMDGPU::sel_z;break;
case 3:Sel = AMDGPU::sel_w;break;
default:break;
}
unsigned Res = TRI.getSubReg(DstReg, Sel);
const MCInstrDesc &Opcode = (i < 4)?
TII->get(AMDGPU::INTERP_ZW):
TII->get(AMDGPU::INTERP_XY);
MachineInstr *NewMI = BuildMI(*(MI.getParent()),
I, MI.getParent()->findDebugLoc(I),
Opcode, Res)
.addReg(IJIndex)
.addReg(ReadReg)
.addImm(0);
if (!(i> 1 && i < 6)) {
TII->addFlag(NewMI, 0, MO_FLAG_MASK);
}
if (i % 4 != 3)
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
}
MI.eraseFromParent();
return true;
}
bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
{
const R600RegisterInfo &TRI = TII->getRegisterInfo();
if (MI.getOpcode() != AMDGPU::input_constant)
return false;
MachineBasicBlock::iterator I = &MI;
unsigned DstReg = MI.getOperand(0).getReg();
for (unsigned i = 0; i < 4; i++) {
unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
4 * MI.getOperand(1).getImm() + i);
unsigned Sel;
switch (i % 4) {
case 0:Sel = AMDGPU::sel_x;break;
case 1:Sel = AMDGPU::sel_y;break;
case 2:Sel = AMDGPU::sel_z;break;
case 3:Sel = AMDGPU::sel_w;break;
default:break;
}
unsigned Res = TRI.getSubReg(DstReg, Sel);
MachineInstr *NewMI = BuildMI(*(MI.getParent()),
I, MI.getParent()->findDebugLoc(I),
TII->get(AMDGPU::INTERP_LOAD_P0), Res)
.addReg(ReadReg)
.addImm(0);
if (i % 4 != 3)
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
}
MI.eraseFromParent();
return true;
}
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
MachineBasicBlock::iterator I = MBB.begin();
while (I != MBB.end()) {
MachineInstr &MI = *I;
I = llvm::next(I);
if (ExpandInputPerspective(MI))
continue;
if (ExpandInputConstant(MI))
continue;
bool IsReduction = TII->isReductionOp(MI.getOpcode());
bool IsVector = TII->isVector(MI);
bool IsCube = TII->isCubeOp(MI.getOpcode());
if (!IsReduction && !IsVector && !IsCube) {
continue;
}
// Expand the instruction
//
// Reduction instructions:
// T0_X = DP4 T1_XYZW, T2_XYZW
// becomes:
// TO_X = DP4 T1_X, T2_X
// TO_Y (write masked) = DP4 T1_Y, T2_Y
// TO_Z (write masked) = DP4 T1_Z, T2_Z
// TO_W (write masked) = DP4 T1_W, T2_W
//
// Vector instructions:
// T0_X = MULLO_INT T1_X, T2_X
// becomes:
// T0_X = MULLO_INT T1_X, T2_X
// T0_Y (write masked) = MULLO_INT T1_X, T2_X
// T0_Z (write masked) = MULLO_INT T1_X, T2_X
// T0_W (write masked) = MULLO_INT T1_X, T2_X
//
// Cube instructions:
// T0_XYZW = CUBE T1_XYZW
// becomes:
// TO_X = CUBE T1_Z, T1_Y
// T0_Y = CUBE T1_Z, T1_X
// T0_Z = CUBE T1_X, T1_Z
// T0_W = CUBE T1_Y, T1_Z
for (unsigned Chan = 0; Chan < 4; Chan++) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Src0 = MI.getOperand(1).getReg();
unsigned Src1 = 0;
// Determine the correct source registers
if (!IsCube) {
Src1 = MI.getOperand(2).getReg();
}
if (IsReduction) {
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
Src0 = TRI.getSubReg(Src0, SubRegIndex);
Src1 = TRI.getSubReg(Src1, SubRegIndex);
} else if (IsCube) {
static const int CubeSrcSwz[] = {2, 2, 0, 1};
unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
Src1 = TRI.getSubReg(Src0, SubRegIndex1);
Src0 = TRI.getSubReg(Src0, SubRegIndex0);
}
// Determine the correct destination registers;
unsigned Flags = 0;
if (IsCube) {
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
DstReg = TRI.getSubReg(DstReg, SubRegIndex);
} else {
// Mask the write if the original instruction does not write to
// the current Channel.
Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
unsigned DstBase = TRI.getHWRegIndex(DstReg);
DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
}
// Set the IsLast bit
Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
// Add the new instruction
unsigned Opcode;
if (IsCube) {
switch (MI.getOpcode()) {
case AMDGPU::CUBE_r600_pseudo:
Opcode = AMDGPU::CUBE_r600_real;
break;
case AMDGPU::CUBE_eg_pseudo:
Opcode = AMDGPU::CUBE_eg_real;
break;
default:
assert(!"Unknown CUBE instruction");
Opcode = 0;
break;
}
} else {
Opcode = MI.getOpcode();
}
MachineInstr *NewMI =
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
.addReg(Src0)
.addReg(Src1)
.addImm(0); // Flag
NewMI->setIsInsideBundle(Chan != 0);
TII->addFlag(NewMI, 0, Flags);
}
MI.eraseFromParent();
}
}
return false;
}

View file

@ -1,206 +0,0 @@
#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
#===------------------------------------------------------------------------===#
#
# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
# it also generates a file called R600HwRegInfo.include, which contains helper
# functions for determining the hw encoding of registers.
#
#===------------------------------------------------------------------------===#
use strict;
use warnings;
use constant CONST_REG_COUNT => 512;
use constant TEMP_REG_COUNT => 128;
my $CREG_MAX = CONST_REG_COUNT - 1;
my $TREG_MAX = TEMP_REG_COUNT - 1;
print <<STRING;
class R600Reg <string name> : Register<name> {
let Namespace = "AMDGPU";
}
class R600Reg_128<string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
}
STRING
my $i;
### REG DEFS ###
my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C");
my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T");
my @t128reg;
my @treg_x;
for (my $i = 0; $i < TEMP_REG_COUNT; $i++) {
my $name = "T$i\_XYZW";
print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W] >;\n};
$t128reg[$i] = $name;
$treg_x[$i] = "T$i\_X";
}
my $treg_string = join(",", @treg_list);
my $creg_list = join(",", @creg_list);
my $t128_string = join(",", @t128reg);
my $treg_x_string = join(",", @treg_x);
print <<STRING;
class RegSet <dag s> {
dag set = s;
}
def ZERO : R600Reg<"0.0">;
def HALF : R600Reg<"0.5">;
def ONE : R600Reg<"1.0">;
def ONE_INT : R600Reg<"1">;
def NEG_HALF : R600Reg<"-0.5">;
def NEG_ONE : R600Reg<"-1.0">;
def PV_X : R600Reg<"pv.x">;
def ALU_LITERAL_X : R600Reg<"literal.x">;
def PREDICATE_BIT : R600Reg<"PredicateBit">;
def PRED_SEL_OFF: R600Reg<"Pred_sel_off">;
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero">;
def PRED_SEL_ONE : R600Reg<"Pred_sel_one">;
def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
$creg_list)>;
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
$treg_string)>;
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
$treg_x_string)>;
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
R600_TReg32,
R600_CReg32,
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
PREDICATE_BIT)>;
def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
$t128_string)>
{
let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)];
let CopyCost = -1;
}
STRING
my %index_map;
my %chan_map;
for ($i = 0; $i <= $#creg_list; $i++) {
push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]);
push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]);
}
for ($i = 0; $i <= $#treg_list; $i++) {
push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]);
push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]);
}
for ($i = 0; $i <= $#t128reg; $i++) {
push(@{$index_map{$i}}, $t128reg[$i]);
push(@{$chan_map{'X'}}, $t128reg[$i]);
}
open(OUTFILE, ">", "R600HwRegInfo.include");
print OUTFILE <<STRING;
unsigned R600RegisterInfo::getHWRegIndexGen(unsigned reg) const
{
switch(reg) {
default: assert(!"Unknown register"); return 0;
STRING
foreach my $key (keys(%index_map)) {
foreach my $reg (@{$index_map{$key}}) {
print OUTFILE " case AMDGPU::$reg:\n";
}
print OUTFILE " return $key;\n\n";
}
print OUTFILE " }\n}\n\n";
print OUTFILE <<STRING;
unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
{
switch(reg) {
default: assert(!"Unknown register"); return 0;
STRING
foreach my $key (keys(%chan_map)) {
foreach my $reg (@{$chan_map{$key}}) {
print OUTFILE " case AMDGPU::$reg:\n";
}
my $val;
if ($key eq 'X') {
$val = 0;
} elsif ($key eq 'Y') {
$val = 1;
} elsif ($key eq 'Z') {
$val = 2;
} elsif ($key eq 'W') {
$val = 3;
} else {
die("Unknown chan value; $key");
}
print OUTFILE " return $val;\n\n";
}
print OUTFILE " }\n}\n\n";
sub print_reg_defs {
my ($count, $prefix) = @_;
my @reg_list;
for ($i = 0; $i < $count; $i++) {
my $hw_index = get_hw_index($i);
my $chan= get_chan_str($i);
my $name = "$prefix$hw_index\_$chan";
print qq{def $name : R600Reg <"$prefix$hw_index.$chan">;\n};
$reg_list[$i] = $name;
}
return @reg_list;
}
#Helper functions
sub get_hw_index {
my ($index) = @_;
return int($index / 4);
}
sub get_chan_str {
my ($index) = @_;
my $chan = $index % 4;
if ($chan == 0 ) {
return 'X';
} elsif ($chan == 1) {
return 'Y';
} elsif ($chan == 2) {
return 'Z';
} elsif ($chan == 3) {
return 'W';
} else {
die("Unknown chan value: $chan");
}
}

View file

@ -1,740 +0,0 @@
//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
// is mostly EmitInstrWithCustomInserter().
//
//===----------------------------------------------------------------------===//
#include "R600ISelLowering.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "llvm/Argument.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
{
setOperationAction(ISD::MUL, MVT::i64, Expand);
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
computeRegisterProperties();
setOperationAction(ISD::FADD, MVT::v4f32, Expand);
setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::FSUB, MVT::f32, Expand);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
setTargetDAGCombine(ISD::FP_ROUND);
setSchedulingPreference(Sched::VLIW);
}
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const
{
MachineFunction * MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = *MI;
switch (MI->getOpcode()) {
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::SHADER_TYPE: break;
case AMDGPU::CLAMP_R600:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(0) // Flags
.addReg(AMDGPU::PRED_SEL_OFF);
TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
break;
}
case AMDGPU::FABS_R600:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(0) // Flags
.addReg(AMDGPU::PRED_SEL_OFF);
TII->addFlag(NewMI, 1, MO_FLAG_ABS);
break;
}
case AMDGPU::FNEG_R600:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(0) // Flags
.addReg(AMDGPU::PRED_SEL_OFF);
TII->addFlag(NewMI, 1, MO_FLAG_NEG);
break;
}
case AMDGPU::R600_LOAD_CONST:
{
int64_t RegIndex = MI->getOperand(1).getImm();
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
.addOperand(MI->getOperand(0))
.addReg(ConstantReg);
break;
}
case AMDGPU::MASK_WRITE:
{
unsigned maskedRegister = MI->getOperand(0).getReg();
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
TII->addFlag(defInstr, 0, MO_FLAG_MASK);
// Return early so the instruction is not erased
return BB;
}
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
{
// Convert to DWORD address
unsigned NewAddr = MRI.createVirtualRegister(
&AMDGPU::R600_TReg32_XRegClass);
unsigned ShiftValue = MRI.createVirtualRegister(
&AMDGPU::R600_TReg32RegClass);
unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
// XXX In theory, we should be able to pass ShiftValue directly to
// the LSHR_eg instruction as an inline literal, but I tried doing it
// this way and it didn't produce the correct results.
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
ShiftValue)
.addReg(AMDGPU::ALU_LITERAL_X)
.addReg(AMDGPU::PRED_SEL_OFF)
.addImm(2);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
.addOperand(MI->getOperand(1))
.addReg(ShiftValue)
.addReg(AMDGPU::PRED_SEL_OFF);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
.addOperand(MI->getOperand(0))
.addReg(NewAddr)
.addImm(EOP); // Set End of program bit
break;
}
case AMDGPU::RESERVE_REG:
{
R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
int64_t ReservedIndex = MI->getOperand(0).getImm();
unsigned ReservedReg =
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
MFI->ReservedRegs.push_back(ReservedReg);
break;
}
case AMDGPU::TXD:
{
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addReg(t0, RegState::Implicit)
.addReg(t1, RegState::Implicit);
break;
}
case AMDGPU::TXD_SHADOW:
{
unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addReg(t0, RegState::Implicit)
.addReg(t1, RegState::Implicit);
break;
}
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(0);
break;
case AMDGPU::BRANCH_COND_f32:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
.addReg(AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_NOT_ZERO)
.addImm(0); // Flags
TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
case AMDGPU::BRANCH_COND_i32:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
.addReg(AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_NOT_ZERO_INT)
.addImm(0); // Flags
TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
case AMDGPU::input_perspective:
{
R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
// XXX Be more fine about register reservation
for (unsigned i = 0; i < 4; i ++) {
unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
MFI->ReservedRegs.push_back(ReservedReg);
}
switch (MI->getOperand(1).getImm()) {
case 0:// Perspective
MFI->HasPerspectiveInterpolation = true;
break;
case 1:// Linear
MFI->HasLinearInterpolation = true;
break;
default:
assert(0 && "Unknow ij index");
}
return BB;
}
}
MI->eraseFromParent();
return BB;
}
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
using namespace llvm::Intrinsic;
using namespace llvm::AMDGPUIntrinsic;
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::ROTL: return LowerROTL(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
case AMDGPUIntrinsic::AMDGPU_store_output: {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
if (!MRI.isLiveOut(Reg)) {
MRI.addLiveOut(Reg);
}
return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
}
// default for switch(IntrinsicID)
default: break;
}
// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
switch(IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case AMDGPUIntrinsic::R600_load_input: {
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
}
case AMDGPUIntrinsic::R600_load_input_perspective: {
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDValue FullVector = DAG.getNode(
AMDGPUISD::INTERP,
DL, MVT::v4f32,
DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
}
case AMDGPUIntrinsic::R600_load_input_linear: {
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDValue FullVector = DAG.getNode(
AMDGPUISD::INTERP,
DL, MVT::v4f32,
DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
}
case AMDGPUIntrinsic::R600_load_input_constant: {
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDValue FullVector = DAG.getNode(
AMDGPUISD::INTERP_P0,
DL, MVT::v4f32,
DAG.getConstant(slot / 4 , MVT::i32));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
}
case AMDGPUIntrinsic::R600_load_input_position: {
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
RegIndex, MVT::f32);
if ((slot % 4) == 3) {
return DAG.getNode(ISD::FDIV,
DL, VT,
DAG.getConstantFP(1.0f, MVT::f32),
Reg);
} else {
return Reg;
}
}
case r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
case r600_read_ngroups_y:
return LowerImplicitParameter(DAG, VT, DL, 1);
case r600_read_ngroups_z:
return LowerImplicitParameter(DAG, VT, DL, 2);
case r600_read_global_size_x:
return LowerImplicitParameter(DAG, VT, DL, 3);
case r600_read_global_size_y:
return LowerImplicitParameter(DAG, VT, DL, 4);
case r600_read_global_size_z:
return LowerImplicitParameter(DAG, VT, DL, 5);
case r600_read_local_size_x:
return LowerImplicitParameter(DAG, VT, DL, 6);
case r600_read_local_size_y:
return LowerImplicitParameter(DAG, VT, DL, 7);
case r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
case r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
case r600_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Y, VT);
case r600_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Z, VT);
case r600_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_X, VT);
case r600_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Y, VT);
case r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
}
} // end switch(Op.getOpcode())
return SDValue();
}
void R600TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const
{
switch (N->getOpcode()) {
default: return;
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
case ISD::INTRINSIC_WO_CHAIN:
{
unsigned IntrinsicID =
cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
Results.push_back(LowerInputFace(N, DAG));
} else {
return;
}
}
}
}
SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
{
unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
RegIndex, MVT::f32);
return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
Reg, DAG.getConstantFP(0.0f, MVT::f32),
DAG.getCondCode(ISD::SETUGT));
}
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
{
return DAG.getNode(
ISD::SETCC,
Op.getDebugLoc(),
MVT::i1,
Op, DAG.getConstantFP(0.0f, MVT::f32),
DAG.getCondCode(ISD::SETNE)
);
}
SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
{
SDValue Chain = Op.getOperand(0);
SDValue CC = Op.getOperand(1);
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue JumpT = Op.getOperand(4);
SDValue CmpValue;
SDValue Result;
if (LHS.getValueType() == MVT::i32) {
CmpValue = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::i32,
LHS, RHS,
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
CC);
} else if (LHS.getValueType() == MVT::f32) {
CmpValue = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::f32,
LHS, RHS,
DAG.getConstantFP(1.0f, MVT::f32),
DAG.getConstantFP(0.0f, MVT::f32),
CC);
} else {
assert(0 && "Not valid type for br_cc");
}
Result = DAG.getNode(
AMDGPUISD::BRANCH_COND,
CmpValue.getDebugLoc(),
MVT::Other, Chain,
JumpT, CmpValue);
return Result;
}
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
DebugLoc DL,
unsigned DwordOffset) const
{
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::PARAM_I_ADDRESS);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getConstant(ByteOffset, MVT::i32), // PTR
MachinePointerInfo(ConstantPointerNull::get(PtrType)),
false, false, false, 0);
}
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
Op.getOperand(0),
Op.getOperand(0),
DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(32, MVT::i32),
Op.getOperand(1)));
}
bool R600TargetLowering::isZero(SDValue Op) const
{
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
return Cst->isNullValue();
} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
return CstFP->isZero();
} else {
return false;
}
}
SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue True = Op.getOperand(2);
SDValue False = Op.getOperand(3);
SDValue CC = Op.getOperand(4);
SDValue Temp;
// LHS and RHS are guaranteed to be the same value type
EVT CompareVT = LHS.getValueType();
// We need all the operands of SELECT_CC to have the same value type, so if
// necessary we need to convert LHS and RHS to be the same type True and
// False. True and False are guaranteed to have the same type as this
// SELECT_CC node.
if (isHWTrueValue(True) && isHWFalseValue(False)) {
if (CompareVT != VT) {
if (VT == MVT::f32 && CompareVT == MVT::i32) {
SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
LHS, RHS,
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
CC);
return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean);
} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
LHS, RHS,
DAG.getConstantFP(1.0f, MVT::f32),
DAG.getConstantFP(0.0f, MVT::f32),
CC);
return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt);
} else {
// I don't think there will be any other type pairings.
assert(!"Unhandled operand type parings in SELECT_CC");
}
} else {
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
}
}
// XXX If True is a hardware TRUE value and False is a hardware FALSE value,
// we can handle this with a native instruction, but we need to swap true
// and false and change the conditional.
if (isHWTrueValue(False) && isHWFalseValue(True)) {
}
// Check if we can lower this to a native operation.
// CND* instructions requires all operands to have the same type,
// and RHS to be zero.
if (isZero(LHS) || isZero(RHS)) {
SDValue Cond = (isZero(LHS) ? RHS : LHS);
SDValue Zero = (isZero(LHS) ? LHS : RHS);
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
if (CompareVT != VT) {
True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
}
if (isZero(LHS)) {
CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
}
switch (CCOpcode) {
case ISD::SETONE:
case ISD::SETUNE:
case ISD::SETNE:
case ISD::SETULE:
case ISD::SETULT:
case ISD::SETOLE:
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT:
CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Temp = True;
True = False;
False = Temp;
break;
default:
break;
}
SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
Cond, Zero,
True, False,
DAG.getCondCode(CCOpcode));
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
}
// If we make it this for it means we have no native instructions to handle
// this SELECT_CC, so we must lower it.
SDValue HWTrue, HWFalse;
if (CompareVT == MVT::f32) {
HWTrue = DAG.getConstantFP(1.0f, CompareVT);
HWFalse = DAG.getConstantFP(0.0f, CompareVT);
} else if (CompareVT == MVT::i32) {
HWTrue = DAG.getConstant(-1, CompareVT);
HWFalse = DAG.getConstant(0, CompareVT);
}
else {
assert(!"Unhandled value type in LowerSELECT_CC");
}
// Lower this unsupported SELECT_CC into a combination of two supported
// SELECT_CC operations.
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
return DAG.getNode(ISD::SELECT_CC, DL, VT,
Cond, HWFalse,
True, False,
DAG.getCondCode(ISD::SETNE));
}
SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
{
SDValue Cond;
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
DebugLoc DL = Op.getDebugLoc();
assert(Op.getValueType() == MVT::i32);
if (LHS.getValueType() == MVT::i32) {
Cond = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::i32,
LHS, RHS,
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
CC);
} else if (LHS.getValueType() == MVT::f32) {
Cond = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::f32,
LHS, RHS,
DAG.getConstantFP(1.0f, MVT::f32),
DAG.getConstantFP(0.0f, MVT::f32),
CC);
Cond = DAG.getNode(
ISD::FP_TO_SINT,
DL,
MVT::i32,
Cond);
} else {
assert(0 && "Not valid type for set_cc");
}
Cond = DAG.getNode(
ISD::AND,
DL,
MVT::i32,
DAG.getConstant(1, MVT::i32),
Cond);
return Cond;
}
// XXX Only kernel functions are supporte, so we can assume for now that
// every function is a kernel function, but in the future we should use
// separate calling conventions for kernel and non-kernel functions.
// Only kernel functions are supported, so we can assume for now
SDValue R600TargetLowering::LowerFormalArguments(
SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const
{
unsigned ParamOffsetBytes = 36;
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
EVT VT = Ins[i].VT;
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::PARAM_I_ADDRESS);
SDValue Arg = DAG.getLoad(VT, DL, DAG.getRoot(),
DAG.getConstant(ParamOffsetBytes, MVT::i32),
MachinePointerInfo(new Argument(PtrTy)),
false, false, false, 4);
InVals.push_back(Arg);
ParamOffsetBytes += (VT.getStoreSize());
}
return Chain;
}
//===----------------------------------------------------------------------===//
// Custom DAG Optimizations
//===----------------------------------------------------------------------===//
SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const
{
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
case ISD::FP_ROUND: {
SDValue Arg = N->getOperand(0);
if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
Arg.getOperand(0));
}
break;
}
}
return SDValue();
}

View file

@ -1,69 +0,0 @@
//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600 DAG Lowering interface definition
//
//===----------------------------------------------------------------------===//
#ifndef R600ISELLOWERING_H
#define R600ISELLOWERING_H
#include "AMDGPUISelLowering.h"
namespace llvm {
class R600InstrInfo;
class R600TargetLowering : public AMDGPUTargetLowering
{
public:
R600TargetLowering(TargetMachine &TM);
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock * BB) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
void ReplaceNodeResults(SDNode * N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
virtual SDValue LowerFormalArguments(
SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
private:
const R600InstrInfo * TII;
/// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
/// that are stored in the first nine dwords of a Vertex Buffer. These
/// implicit parameters are lowered to load instructions which retreive the
/// values from the Vertex Buffer.
SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
DebugLoc DL, unsigned DwordOffset) const;
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
/// LowerROTL - Lower ROTL opcode to BITALIGN
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
bool isZero(SDValue Op) const;
};
} // End namespace llvm;
#endif // R600ISELLOWERING_H

View file

@ -1,512 +0,0 @@
//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600 Implementation of TargetInstrInfo.
//
//===----------------------------------------------------------------------===//
#include "R600InstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "AMDILUtilityFunctions.h"
#define GET_INSTRINFO_CTOR
#include "AMDGPUGenDFAPacketizer.inc"
using namespace llvm;
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
RI(tm, *this),
TM(tm)
{ }
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
{
return RI;
}
bool R600InstrInfo::isTrig(const MachineInstr &MI) const
{
return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
}
bool R600InstrInfo::isVector(const MachineInstr &MI) const
{
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
}
void
R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const
{
if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
&& AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
for (unsigned I = 0; I < 4; I++) {
unsigned SubRegIndex = RI.getSubRegFromChannel(I);
BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
.addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
.addReg(RI.getSubReg(SrcReg, SubRegIndex))
.addImm(0) // Flag
.addReg(0) // PREDICATE_BIT
.addReg(DestReg, RegState::Define | RegState::Implicit);
}
} else {
/* We can't copy vec4 registers */
assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
&& !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(0) // Flag
.addReg(0); // PREDICATE_BIT
}
}
MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
unsigned DstReg, int64_t Imm) const
{
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
MachineInstrBuilder(MI).addImm(Imm);
MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
return MI;
}
unsigned R600InstrInfo::getIEQOpcode() const
{
return AMDGPU::SETE_INT;
}
bool R600InstrInfo::isMov(unsigned Opcode) const
{
switch(Opcode) {
default: return false;
case AMDGPU::MOV:
case AMDGPU::MOV_IMM_F32:
case AMDGPU::MOV_IMM_I32:
return true;
}
}
// Some instructions act as place holders to emulate operations that the GPU
// hardware does automatically. This function can be used to check if
// an opcode falls into this category.
bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const
{
switch (Opcode) {
default: return false;
case AMDGPU::RETURN:
case AMDGPU::MASK_WRITE:
case AMDGPU::RESERVE_REG:
return true;
}
}
bool R600InstrInfo::isReductionOp(unsigned Opcode) const
{
switch(Opcode) {
default: return false;
case AMDGPU::DOT4_r600:
case AMDGPU::DOT4_eg:
return true;
}
}
bool R600InstrInfo::isCubeOp(unsigned Opcode) const
{
switch(Opcode) {
default: return false;
case AMDGPU::CUBE_r600_pseudo:
case AMDGPU::CUBE_r600_real:
case AMDGPU::CUBE_eg_pseudo:
case AMDGPU::CUBE_eg_real:
return true;
}
}
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const
{
const InstrItineraryData *II = TM->getInstrItineraryData();
return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
}
static bool
isPredicateSetter(unsigned Opcode)
{
switch (Opcode) {
case AMDGPU::PRED_X:
return true;
default:
return false;
}
}
static MachineInstr *
findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I)
{
while (I != MBB.begin()) {
--I;
MachineInstr *MI = I;
if (isPredicateSetter(MI->getOpcode()))
return MI;
}
return NULL;
}
bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const
{
// Most of the following comes from the ARM implementation of AnalyzeBranch
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
return false;
--I;
while (I->isDebugValue()) {
if (I == MBB.begin())
return false;
--I;
}
if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
return false;
}
// Get the last instruction in the block.
MachineInstr *LastInst = I;
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() ||
static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
if (LastOpc == AMDGPU::JUMP) {
if(!isPredicated(LastInst)) {
TBB = LastInst->getOperand(0).getMBB();
return false;
} else {
MachineInstr *predSet = I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
}
}
return true; // Can't handle indirect branch.
}
// Get the instruction before it if it is a terminator.
MachineInstr *SecondLastInst = I;
unsigned SecondLastOpc = SecondLastInst->getOpcode();
// If the block ends with a B and a Bcc, handle it.
if (SecondLastOpc == AMDGPU::JUMP &&
isPredicated(SecondLastInst) &&
LastOpc == AMDGPU::JUMP &&
!isPredicated(LastInst)) {
MachineInstr *predSet = --I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = SecondLastInst->getOperand(0).getMBB();
FBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
}
// Otherwise, can't handle this.
return true;
}
int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
const MachineInstr *MI = op.getParent();
switch (MI->getDesc().OpInfo->RegClass) {
default: // FIXME: fallthrough??
case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
};
}
unsigned
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const
{
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
if (FBB == 0) {
if (Cond.empty()) {
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
return 1;
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
assert(PredSet && "No previous predicate !");
addFlag(PredSet, 1, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
return 1;
}
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
assert(PredSet && "No previous predicate !");
addFlag(PredSet, 1, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
return 2;
}
}
unsigned
R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
{
// Note : we leave PRED* instructions there.
// They may be needed when predicating instructions.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) {
return 0;
}
--I;
switch (I->getOpcode()) {
default:
return 0;
case AMDGPU::JUMP:
if (isPredicated(I)) {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 1, MO_FLAG_PUSH);
}
I->eraseFromParent();
break;
}
I = MBB.end();
if (I == MBB.begin()) {
return 1;
}
--I;
switch (I->getOpcode()) {
// FIXME: only one case??
default:
return 1;
case AMDGPU::JUMP:
if (isPredicated(I)) {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 1, MO_FLAG_PUSH);
}
I->eraseFromParent();
break;
}
return 2;
}
bool
R600InstrInfo::isPredicated(const MachineInstr *MI) const
{
int idx = MI->findFirstPredOperandIdx();
if (idx < 0)
return false;
unsigned Reg = MI->getOperand(idx).getReg();
switch (Reg) {
default: return false;
case AMDGPU::PRED_SEL_ONE:
case AMDGPU::PRED_SEL_ZERO:
case AMDGPU::PREDICATE_BIT:
return true;
}
}
bool
R600InstrInfo::isPredicable(MachineInstr *MI) const
{
return AMDGPUInstrInfo::isPredicable(MI);
}
bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
unsigned ExtraPredCycles,
const BranchProbability &Probability) const{
return true;
}
bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumTCycles,
unsigned ExtraTCycles,
MachineBasicBlock &FMBB,
unsigned NumFCycles,
unsigned ExtraFCycles,
const BranchProbability &Probability) const
{
return true;
}
bool
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
const BranchProbability &Probability)
const
{
return true;
}
bool
R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const
{
return false;
}
bool
R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
{
MachineOperand &MO = Cond[1];
switch (MO.getImm()) {
case OPCODE_IS_ZERO_INT:
MO.setImm(OPCODE_IS_NOT_ZERO_INT);
break;
case OPCODE_IS_NOT_ZERO_INT:
MO.setImm(OPCODE_IS_ZERO_INT);
break;
case OPCODE_IS_ZERO:
MO.setImm(OPCODE_IS_NOT_ZERO);
break;
case OPCODE_IS_NOT_ZERO:
MO.setImm(OPCODE_IS_ZERO);
break;
default:
return true;
}
MachineOperand &MO2 = Cond[2];
switch (MO2.getReg()) {
case AMDGPU::PRED_SEL_ZERO:
MO2.setReg(AMDGPU::PRED_SEL_ONE);
break;
case AMDGPU::PRED_SEL_ONE:
MO2.setReg(AMDGPU::PRED_SEL_ZERO);
break;
default:
return true;
}
return false;
}
bool
R600InstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const
{
return isPredicateSetter(MI->getOpcode());
}
bool
R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const
{
return false;
}
bool
R600InstrInfo::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const
{
int PIdx = MI->findFirstPredOperandIdx();
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
PMO.setReg(Pred[2].getReg());
MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
return true;
}
return false;
}
int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const
{
if (PredCost)
*PredCost = 2;
return 2;
}
//===----------------------------------------------------------------------===//
// Instruction flag getters/setters
//===----------------------------------------------------------------------===//
bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const
{
return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
}
MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const
{
unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags);
assert(FlagIndex != 0 &&
"Instruction flags not supported for this instruction");
MachineOperand &FlagOp = MI->getOperand(FlagIndex);
assert(FlagOp.isImm());
return FlagOp;
}
void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
unsigned Flag) const
{
MachineOperand &FlagOp = getFlagOp(MI);
FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
}
void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
unsigned Flag) const
{
MachineOperand &FlagOp = getFlagOp(MI);
unsigned InstFlags = FlagOp.getImm();
InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
FlagOp.setImm(InstFlags);
}

View file

@ -1,132 +0,0 @@
//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Interface definition for R600InstrInfo
//
//===----------------------------------------------------------------------===//
#ifndef R600INSTRUCTIONINFO_H_
#define R600INSTRUCTIONINFO_H_
#include "AMDIL.h"
#include "AMDGPUInstrInfo.h"
#include "R600RegisterInfo.h"
#include <map>
namespace llvm {
class AMDGPUTargetMachine;
class DFAPacketizer;
class ScheduleDAG;
class MachineFunction;
class MachineInstr;
class MachineInstrBuilder;
class R600InstrInfo : public AMDGPUInstrInfo {
private:
const R600RegisterInfo RI;
AMDGPUTargetMachine &TM;
int getBranchInstr(const MachineOperand &op) const;
public:
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
const R600RegisterInfo &getRegisterInfo() const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
bool isTrig(const MachineInstr &MI) const;
bool isPlaceHolderOpcode(unsigned opcode) const;
bool isReductionOp(unsigned opcode) const;
bool isCubeOp(unsigned opcode) const;
/// isVector - Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const;
virtual unsigned getIEQOpcode() const;
virtual bool isMov(unsigned Opcode) const;
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
bool isPredicated(const MachineInstr *MI) const;
bool isPredicable(MachineInstr *MI) const;
bool
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
const BranchProbability &Probability) const;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
unsigned ExtraPredCycles,
const BranchProbability &Probability) const ;
bool
isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumTCycles, unsigned ExtraTCycles,
MachineBasicBlock &FMBB,
unsigned NumFCycles, unsigned ExtraFCycles,
const BranchProbability &Probability) const;
bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const;
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const;
bool PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const;
int getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost = 0) const;
virtual int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const { return 1;}
///hasFlagOperand - Returns true if this instruction has an operand for
/// storing target flags.
bool hasFlagOperand(const MachineInstr &MI) const;
///addFlag - Add one of the MO_FLAG* flags to the specified Operand.
void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
///isFlagSet - Determine if the specified flag is set on this Operand.
bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
///getFlagOp - Return the operand containing the flags for this instruction.
MachineOperand &getFlagOp(MachineInstr *MI) const;
///clearFlag - Clear the specified flag on the instruction.
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
};
} // End llvm namespace
#endif // R600INSTRINFO_H_

File diff suppressed because it is too large Load diff

View file

@ -1,50 +0,0 @@
//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600 Intrinsic Definitions
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "R600", isTarget = 1 in {
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_R600_load_input_perspective :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_constant :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_linear :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_position :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_face :
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
}
let TargetPrefix = "r600", isTarget = 1 in {
class R600ReadPreloadRegisterIntrinsic<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
}
defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_global_size">;
defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_local_size">;
defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_ngroups">;
defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tgid">;
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tidig">;
} // End TargetPrefix = "r600"

View file

@ -1,26 +0,0 @@
//===-- R600Intrinsics.td - TODO: Add brief description -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// TODO: Add full description
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "R600", isTarget = 1 in {
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_R600_load_input_perspective :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_constant :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_linear :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_position :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_face :
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
}

View file

@ -1,33 +0,0 @@
//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "R600MachineFunctionInfo.h"
using namespace llvm;
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo(),
HasLinearInterpolation(false),
HasPerspectiveInterpolation(false)
{ }
unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
{
assert(HasPerspectiveInterpolation);
return 0;
}
unsigned R600MachineFunctionInfo::GetIJLinearIndex() const
{
assert(HasLinearInterpolation);
if (HasPerspectiveInterpolation)
return 1;
else
return 0;
}

View file

@ -1,38 +0,0 @@
//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600MachineFunctionInfo is used for keeping track of which registers have
// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
//
//===----------------------------------------------------------------------===//
#ifndef R600MACHINEFUNCTIONINFO_H
#define R600MACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
#include <vector>
namespace llvm {
class R600MachineFunctionInfo : public MachineFunctionInfo {
public:
R600MachineFunctionInfo(const MachineFunction &MF);
std::vector<unsigned> ReservedRegs;
bool HasLinearInterpolation;
bool HasPerspectiveInterpolation;
unsigned GetIJLinearIndex() const;
unsigned GetIJPerspectiveIndex() const;
};
} // End llvm namespace
#endif //R600MACHINEFUNCTIONINFO_H

View file

@ -1,128 +0,0 @@
//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The file contains the R600 implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#include "R600RegisterInfo.h"
#include "AMDGPUTargetMachine.h"
#include "R600MachineFunctionInfo.h"
using namespace llvm;
R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
const TargetInstrInfo &tii)
: AMDGPURegisterInfo(tm, tii),
TM(tm),
TII(tii)
{ }
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
{
BitVector Reserved(getNumRegs());
const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
Reserved.set(AMDGPU::ONE);
Reserved.set(AMDGPU::ONE_INT);
Reserved.set(AMDGPU::NEG_HALF);
Reserved.set(AMDGPU::NEG_ONE);
Reserved.set(AMDGPU::PV_X);
Reserved.set(AMDGPU::ALU_LITERAL_X);
Reserved.set(AMDGPU::PREDICATE_BIT);
Reserved.set(AMDGPU::PRED_SEL_OFF);
Reserved.set(AMDGPU::PRED_SEL_ZERO);
Reserved.set(AMDGPU::PRED_SEL_ONE);
for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
Reserved.set(*I);
}
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
E = MFI->ReservedRegs.end(); I != E; ++I) {
Reserved.set(*I);
Reserved.set(*(getSuperRegisters(*I)));
}
return Reserved;
}
const TargetRegisterClass *
R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
{
switch (rc->getID()) {
case AMDGPU::GPRF32RegClassID:
case AMDGPU::GPRI32RegClassID:
return &AMDGPU::R600_Reg32RegClass;
default: return rc;
}
}
unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const
{
switch(reg) {
case AMDGPU::ZERO: return 248;
case AMDGPU::ONE:
case AMDGPU::NEG_ONE: return 249;
case AMDGPU::ONE_INT: return 250;
case AMDGPU::HALF:
case AMDGPU::NEG_HALF: return 252;
case AMDGPU::ALU_LITERAL_X: return 253;
case AMDGPU::PREDICATE_BIT:
case AMDGPU::PRED_SEL_OFF:
case AMDGPU::PRED_SEL_ZERO:
case AMDGPU::PRED_SEL_ONE:
return 0;
default: return getHWRegIndexGen(reg);
}
}
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
{
switch(reg) {
case AMDGPU::ZERO:
case AMDGPU::ONE:
case AMDGPU::ONE_INT:
case AMDGPU::NEG_ONE:
case AMDGPU::HALF:
case AMDGPU::NEG_HALF:
case AMDGPU::ALU_LITERAL_X:
case AMDGPU::PREDICATE_BIT:
case AMDGPU::PRED_SEL_OFF:
case AMDGPU::PRED_SEL_ZERO:
case AMDGPU::PRED_SEL_ONE:
return 0;
default: return getHWRegChanGen(reg);
}
}
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
MVT VT) const
{
switch(VT.SimpleTy) {
default:
case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
}
}
unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const
{
switch (Channel) {
default: assert(!"Invalid channel index"); return 0;
case 0: return AMDGPU::sel_x;
case 1: return AMDGPU::sel_y;
case 2: return AMDGPU::sel_z;
case 3: return AMDGPU::sel_w;
}
}
#include "R600HwRegInfo.include"

View file

@ -1,63 +0,0 @@
//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Interface definition for R600RegisterInfo
//
//===----------------------------------------------------------------------===//
#ifndef R600REGISTERINFO_H_
#define R600REGISTERINFO_H_
#include "AMDGPUTargetMachine.h"
#include "AMDGPURegisterInfo.h"
namespace llvm {
class R600TargetMachine;
class TargetInstrInfo;
struct R600RegisterInfo : public AMDGPURegisterInfo
{
AMDGPUTargetMachine &TM;
const TargetInstrInfo &TII;
R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
/// getISARegClass - rc is an AMDIL reg class. This function returns the
/// R600 reg class that is equivalent to the given AMDIL reg class.
virtual const TargetRegisterClass * getISARegClass(
const TargetRegisterClass * rc) const;
/// getHWRegIndex - get the HW encoding for a register.
unsigned getHWRegIndex(unsigned reg) const;
/// getHWRegChan - get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
/// getCFGStructurizerRegClass - get the register class of the specified
/// type to use in the CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
/// getSubRegFromChannel - Return the sub reg enum value for the given
/// Channel (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
unsigned getSubRegFromChannel(unsigned Channel) const;
private:
/// getHWRegIndexGen - Generated function returns a register's encoding
unsigned getHWRegIndexGen(unsigned reg) const;
/// getHWRegChanGen - Generated function returns a register's channel
/// encoding.
unsigned getHWRegChanGen(unsigned reg) const;
};
} // End namespace llvm
#endif // AMDIDSAREGISTERINFO_H_

View file

@ -1,36 +0,0 @@
//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
// slot has been removed.
//
//===----------------------------------------------------------------------===//
def ALU_X : FuncUnit;
def ALU_Y : FuncUnit;
def ALU_Z : FuncUnit;
def ALU_W : FuncUnit;
def TRANS : FuncUnit;
def AnyALU : InstrItinClass;
def VecALU : InstrItinClass;
def TransALU : InstrItinClass;
def R600_EG_Itin : ProcessorItineraries <
[ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
[],
[
InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
]
>;

View file

@ -1,151 +0,0 @@
//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass maps the pseudo interpolation registers to the correct physical
// registers. Prior to executing a fragment shader, the GPU loads interpolation
// parameters into physical registers. The specific physical register that each
// interpolation parameter ends up in depends on the type of the interpolation
// parameter as well as how many interpolation parameters are used by the
// shader.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDIL.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
class SIAssignInterpRegsPass : public MachineFunctionPass {
private:
static char ID;
TargetMachine &TM;
void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
unsigned physReg, unsigned virtReg);
public:
SIAssignInterpRegsPass(TargetMachine &tm) :
MachineFunctionPass(ID), TM(tm) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "SI Assign intrpolation registers"; }
};
} // End anonymous namespace
char SIAssignInterpRegsPass::ID = 0;
#define INTERP_VALUES 16
#define REQUIRED_VALUE_MAX_INDEX 7
struct InterpInfo {
bool Enabled;
unsigned Regs[3];
unsigned RegCount;
};
FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
return new SIAssignInterpRegsPass(tm);
}
bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
{
struct InterpInfo InterpUse[INTERP_VALUES] = {
{false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
{false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
{false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
{false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
{false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
{false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
{false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
{false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
{false, {AMDGPU::POS_X_FLOAT}, 1},
{false, {AMDGPU::POS_Y_FLOAT}, 1},
{false, {AMDGPU::POS_Z_FLOAT}, 1},
{false, {AMDGPU::POS_W_FLOAT}, 1},
{false, {AMDGPU::FRONT_FACE}, 1},
{false, {AMDGPU::ANCILLARY}, 1},
{false, {AMDGPU::SAMPLE_COVERAGE}, 1},
{false, {AMDGPU::POS_FIXED_PT}, 1}
};
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
// This pass is only needed for pixel shaders.
if (MFI->ShaderType != ShaderType::PIXEL) {
return false;
}
MachineRegisterInfo &MRI = MF.getRegInfo();
bool ForceEnable = true;
// First pass, mark the interpolation values that are used.
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
RegIdx++) {
InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
!MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
if (InterpUse[InterpIdx].Enabled &&
InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
ForceEnable = false;
}
}
}
// At least one interpolation mode must be enabled or else the GPU will hang.
if (ForceEnable) {
InterpUse[0].Enabled = true;
}
unsigned UsedVgprs = 0;
// Second pass, replace with VGPRs.
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
if (!InterpUse[InterpIdx].Enabled) {
continue;
}
MFI->SPIPSInputAddr |= (1 << InterpIdx);
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
RegIdx++, UsedVgprs++) {
unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
addLiveIn(&MF, MRI, NewReg, VirtReg);
}
}
return false;
}
void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
MachineRegisterInfo & MRI,
unsigned physReg, unsigned virtReg)
{
const TargetInstrInfo * TII = TM.getInstrInfo();
if (!MRI.isLiveIn(physReg)) {
MRI.addLiveIn(physReg, virtReg);
MF->front().addLiveIn(physReg);
BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
TII->get(TargetOpcode::COPY), virtReg)
.addReg(physReg);
} else {
MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
}
}

View file

@ -1,291 +0,0 @@
#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
#===------------------------------------------------------------------------===#
#
# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
# it also generates a file called SIHwRegInfo.include, which contains helper
# functions for determining the hw encoding of registers.
#
#===------------------------------------------------------------------------===#
use strict;
use warnings;
my $SGPR_COUNT = 104;
my $VGPR_COUNT = 256;
my $SGPR_MAX_IDX = $SGPR_COUNT - 1;
my $VGPR_MAX_IDX = $VGPR_COUNT - 1;
my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : '';
print <<STRING;
let Namespace = "AMDGPU" in {
def low : SubRegIndex;
def high : SubRegIndex;
def sub0 : SubRegIndex;
def sub1 : SubRegIndex;
def sub2 : SubRegIndex;
def sub3 : SubRegIndex;
def sub4 : SubRegIndex;
def sub5 : SubRegIndex;
def sub6 : SubRegIndex;
def sub7 : SubRegIndex;
}
class SIReg <string n> : Register<n> {
let Namespace = "AMDGPU";
}
class SI_64 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices = [low, high];
}
class SI_128 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
}
class SI_256 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
}
class SGPR_32 <bits<8> num, string name> : SIReg<name> {
field bits<8> Num;
let Num = num;
}
class VGPR_32 <bits<9> num, string name> : SIReg<name> {
field bits<9> Num;
let Num = num;
}
class SGPR_64 <bits<8> num, string name, list<Register> subregs> :
SI_64 <name, subregs>;
class VGPR_64 <bits<9> num, string name, list<Register> subregs> :
SI_64 <name, subregs>;
class SGPR_128 <bits<8> num, string name, list<Register> subregs> :
SI_128 <name, subregs>;
class VGPR_128 <bits<9> num, string name, list<Register> subregs> :
SI_128 <name, subregs>;
class SGPR_256 <bits<8> num, string name, list<Register> subregs> :
SI_256 <name, subregs>;
def VCC : SIReg<"VCC">;
def EXEC_LO : SIReg<"EXEC LO">;
def EXEC_HI : SIReg<"EXEC HI">;
def EXEC : SI_64<"EXEC", [EXEC_LO,EXEC_HI]>;
def SCC : SIReg<"SCC">;
def SREG_LIT_0 : SIReg <"S LIT 0">;
def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">;
def M0 : SIReg <"M0">;
//Interpolation registers
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
def PERSP_I_W : SIReg <"PERSP_I_W">;
def PERSP_J_W : SIReg <"PERSP_J_W">;
def PERSP_1_W : SIReg <"PERSP_1_W">;
def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
def FRONT_FACE : SIReg <"FRONT_FACE">;
def ANCILLARY : SIReg <"ANCILLARY">;
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
STRING
#32 bit register
my @SGPR;
for (my $i = 0; $i < $SGPR_COUNT; $i++) {
print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n";
$SGPR[$i] = "SGPR$i";
}
my @VGPR;
for (my $i = 0; $i < $VGPR_COUNT; $i++) {
print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n";
$VGPR[$i] = "VGPR$i";
}
print <<STRING;
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
>;
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "VGPR%u", 0, $VGPR_MAX_IDX),
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
PERSP_CENTER_I, PERSP_CENTER_J,
PERSP_CENTROID_I, PERSP_CENTROID_J,
PERSP_I_W, PERSP_J_W, PERSP_1_W,
LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
LINEAR_CENTER_I, LINEAR_CENTER_J,
LINEAR_CENTROID_I, LINEAR_CENTROID_J,
LINE_STIPPLE_TEX_COORD,
POS_X_FLOAT,
POS_Y_FLOAT,
POS_Z_FLOAT,
POS_W_FLOAT,
FRONT_FACE,
ANCILLARY,
SAMPLE_COVERAGE,
POS_FIXED_PT
)
>;
def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add VReg_32, SReg_32)
>;
def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
STRING
my @subregs_64 = ('low', 'high');
my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32'));
my $sgpr64_list = join(',', @SGPR64);
my $vgpr64_list = join(',', @VGPR64);
print <<STRING;
def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
(add $sgpr64_list, $vgpr64_list)
>;
STRING
if ($INDEX_FILE ne '') {
open(my $fh, ">", $INDEX_FILE);
my %hw_values;
for (my $i = 0; $i <= $#SGPR; $i++) {
push (@{$hw_values{$i}}, $SGPR[$i]);
}
for (my $i = 0; $i <= $#SGPR64; $i++) {
push (@{$hw_values{$i * 2}}, $SGPR64[$i])
}
for (my $i = 0; $i <= $#SGPR128; $i++) {
push (@{$hw_values{$i * 4}}, $SGPR128[$i]);
}
for (my $i = 0; $i <= $#SGPR256; $i++) {
push (@{$hw_values{$i * 8}}, $SGPR256[$i]);
}
for (my $i = 0; $i <= $#VGPR; $i++) {
push (@{$hw_values{$i}}, $VGPR[$i]);
}
for (my $i = 0; $i <= $#VGPR64; $i++) {
push (@{$hw_values{$i * 2}}, $VGPR64[$i]);
}
for (my $i = 0; $i <= $#VGPR128; $i++) {
push (@{$hw_values{$i * 4}}, $VGPR128[$i]);
}
print $fh "unsigned SIRegisterInfo::getHWRegNum(unsigned reg) const\n{\n switch(reg) {\n";
for my $key (keys(%hw_values)) {
my @names = @{$hw_values{$key}};
for my $regname (@names) {
print $fh " case AMDGPU::$regname:\n"
}
print $fh " return $key;\n";
}
print $fh " default: assert(!\"Unknown Register\"); return 0;\n }\n}\n"
}
sub print_sgpr_class {
my ($reg_width, $sub_reg_ref, @types) = @_;
return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types);
}
sub print_vgpr_class {
my ($reg_width, $sub_reg_ref, @types) = @_;
return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types);
}
sub print_reg_class {
my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_;
my @registers;
my $component_count = $reg_width / 32;
for (my $i = 0; $i < $reg_count; $i += $component_count) {
my $reg_name = $reg_prefix . $i . '_' . $reg_width;
my @sub_regs;
for (my $idx = 0; $idx < $component_count; $idx++) {
my $sub_idx = $i + $idx;
push(@sub_regs, $reg_prefix . $sub_idx);
}
print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n";
push (@registers, $reg_name);
}
#Add VCC to SReg_64
if ($class_prefix eq 'SReg' and $reg_width == 64) {
push (@registers, 'VCC')
}
#Add EXEC to SReg_64
if ($class_prefix eq 'SReg' and $reg_width == 64) {
push (@registers, 'EXEC')
}
my $reg_list = join(', ', @registers);
print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n";
print " let SubRegClasses = [($class_prefix\_", ($reg_width / $component_count) , ' ', join(', ', @{$sub_reg_ref}), ")];\n}\n";
return @registers;
}

View file

@ -1,466 +0,0 @@
//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
// mostly EmitInstrWithCustomInserter().
//
//===----------------------------------------------------------------------===//
#include "SIISelLowering.h"
#include "AMDIL.h"
#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
{
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass);
addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
computeRegisterProperties();
setOperationAction(ISD::AND, MVT::i1, Custom);
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We need to custom lower loads from the USER_SGPR address space, so we can
// add the SGPRs as livein registers.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
}
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const
{
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
MachineBasicBlock::iterator I = MI;
if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
AppendS_WAITCNT(MI, *BB, llvm::next(I));
return BB;
}
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
case AMDGPU::CLAMP_SI:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
// VSRC1-2 are unused, but we still need to fill all the
// operand slots, so we just reuse the VSRC0 operand
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(1))
.addImm(0) // ABS
.addImm(1) // CLAMP
.addImm(0) // OMOD
.addImm(0); // NEG
MI->eraseFromParent();
break;
case AMDGPU::FABS_SI:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
// VSRC1-2 are unused, but we still need to fill all the
// operand slots, so we just reuse the VSRC0 operand
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(1))
.addImm(1) // ABS
.addImm(0) // CLAMP
.addImm(0) // OMOD
.addImm(0); // NEG
MI->eraseFromParent();
break;
case AMDGPU::FNEG_SI:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
// VSRC1-2 are unused, but we still need to fill all the
// operand slots, so we just reuse the VSRC0 operand
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(1))
.addImm(0) // ABS
.addImm(0) // CLAMP
.addImm(0) // OMOD
.addImm(1); // NEG
MI->eraseFromParent();
break;
case AMDGPU::SHADER_TYPE:
BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
MI->getOperand(0).getImm();
MI->eraseFromParent();
break;
case AMDGPU::SI_INTERP:
LowerSI_INTERP(MI, *BB, I, MRI);
break;
case AMDGPU::SI_INTERP_CONST:
LowerSI_INTERP_CONST(MI, *BB, I, MRI);
break;
case AMDGPU::SI_KIL:
LowerSI_KIL(MI, *BB, I, MRI);
break;
case AMDGPU::SI_WQM:
LowerSI_WQM(MI, *BB, I, MRI);
break;
case AMDGPU::SI_V_CNDLT:
LowerSI_V_CNDLT(MI, *BB, I, MRI);
break;
}
return BB;
}
void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I) const
{
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
.addImm(0);
}
void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
{
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
.addReg(AMDGPU::EXEC);
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
{
unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
MachineOperand dst = MI->getOperand(0);
MachineOperand iReg = MI->getOperand(1);
MachineOperand jReg = MI->getOperand(2);
MachineOperand attr_chan = MI->getOperand(3);
MachineOperand attr = MI->getOperand(4);
MachineOperand params = MI->getOperand(5);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
.addOperand(params);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
.addOperand(iReg)
.addOperand(attr_chan)
.addOperand(attr)
.addReg(M0);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
.addOperand(dst)
.addReg(tmp)
.addOperand(jReg)
.addOperand(attr_chan)
.addOperand(attr)
.addReg(M0);
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
MachineBasicBlock &BB, MachineBasicBlock::iterator I,
MachineRegisterInfo &MRI) const
{
MachineOperand dst = MI->getOperand(0);
MachineOperand attr_chan = MI->getOperand(1);
MachineOperand attr = MI->getOperand(2);
MachineOperand params = MI->getOperand(3);
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
.addOperand(params);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
.addOperand(dst)
.addOperand(attr_chan)
.addOperand(attr)
.addReg(M0);
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
{
// Clear this pixel from the exec mask if the operand is negative
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
AMDGPU::VCC)
.addReg(AMDGPU::SREG_LIT_0)
.addOperand(MI->getOperand(0));
// If the exec mask is non-zero, skip the next two instructions
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.addImm(3)
.addReg(AMDGPU::EXEC);
// Exec mask is zero: Export to NULL target...
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP))
.addImm(0)
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
.addImm(0)
.addImm(1)
.addImm(1)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0);
// ... and terminate wavefront
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
{
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_GT_F32_e32),
AMDGPU::VCC)
.addReg(AMDGPU::SREG_LIT_0)
.addOperand(MI->getOperand(1));
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(2))
.addReg(AMDGPU::VCC);
MI->eraseFromParent();
}
EVT SITargetLowering::getSetCCResultType(EVT VT) const
{
return MVT::i1;
}
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
switch (IntrinsicID) {
case AMDGPUIntrinsic::SI_vs_load_buffer_index:
return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
AMDGPU::VGPR0, VT);
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
break;
}
}
return SDValue();
}
/// Loweri1ContextSwitch - The function is for lowering i1 operations on the
/// VCC register. In the VALU context, VCC is a one bit register, but in the
/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
/// the SALU can perform operations on the VCC register, we need to promote
/// the operand types from i1 to i64 in order for tablegen to be able to match
/// this operation to the correct SALU instruction. We do this promotion by
/// wrapping the operands in a CopyToReg node.
///
SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
SelectionDAG &DAG,
unsigned VCCNode) const
{
DebugLoc DL = Op.getDebugLoc();
SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
Op.getOperand(0)),
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
Op.getOperand(1)));
return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
}
SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
{
SDValue Chain = Op.getOperand(0);
SDValue CC = Op.getOperand(1);
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue JumpT = Op.getOperand(4);
SDValue CmpValue;
SDValue Result;
CmpValue = DAG.getNode(
ISD::SETCC,
Op.getDebugLoc(),
MVT::i1,
LHS, RHS,
CC);
Result = DAG.getNode(
AMDGPUISD::BRANCH_COND,
CmpValue.getDebugLoc(),
MVT::Other, Chain,
JumpT, CmpValue);
return Result;
}
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
{
EVT VT = Op.getValueType();
LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
assert(Ptr);
unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
// We only need to lower USER_SGPR address space loads
if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
return SDValue();
}
// Loads from the USER_SGPR address space can only have constant value
// pointers.
ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
assert(BasePtr);
unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
const TargetRegisterClass * dstClass;
switch (TypeDwordWidth) {
default:
assert(!"USER_SGPR value size not implemented");
return SDValue();
case 1:
dstClass = &AMDGPU::SReg_32RegClass;
break;
case 2:
dstClass = &AMDGPU::SReg_64RegClass;
break;
}
uint64_t Index = BasePtr->getZExtValue();
assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
unsigned SGPRIndex = Index / TypeDwordWidth;
unsigned Reg = dstClass->getRegister(SGPRIndex);
DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
VT));
return SDValue();
}
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
{
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue True = Op.getOperand(2);
SDValue False = Op.getOperand(3);
SDValue CC = Op.getOperand(4);
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
}
//===----------------------------------------------------------------------===//
// Custom DAG optimizations
//===----------------------------------------------------------------------===//
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
DebugLoc DL = N->getDebugLoc();
EVT VT = N->getValueType(0);
switch (N->getOpcode()) {
default: break;
case ISD::SELECT_CC: {
N->dump();
ConstantSDNode *True, *False;
// i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
&& (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
&& True->isAllOnesValue()
&& False->isNullValue()
&& VT == MVT::i1) {
return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
N->getOperand(1), N->getOperand(4));
}
break;
}
case ISD::SETCC: {
SDValue Arg0 = N->getOperand(0);
SDValue Arg1 = N->getOperand(1);
SDValue CC = N->getOperand(2);
ConstantSDNode * C = NULL;
ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
// i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
if (VT == MVT::i1
&& Arg0.getOpcode() == ISD::SIGN_EXTEND
&& Arg0.getOperand(0).getValueType() == MVT::i1
&& (C = dyn_cast<ConstantSDNode>(Arg1))
&& C->isNullValue()
&& CCOp == ISD::SETNE) {
return SimplifySetCC(VT, Arg0.getOperand(0),
DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
}
break;
}
}
return SDValue();
}
#define NODE_NAME_CASE(node) case SIISD::node: return #node;
const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const
{
switch (Opcode) {
default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
NODE_NAME_CASE(VCC_AND)
NODE_NAME_CASE(VCC_BITCAST)
}
}

View file

@ -1,63 +0,0 @@
//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// SI DAG Lowering interface definition
//
//===----------------------------------------------------------------------===//
#ifndef SIISELLOWERING_H
#define SIISELLOWERING_H
#include "AMDGPUISelLowering.h"
#include "SIInstrInfo.h"
namespace llvm {
class SITargetLowering : public AMDGPUTargetLowering
{
const SIInstrInfo * TII;
/// AppendS_WAITCNT - Memory reads and writes are syncronized using the
/// S_WAITCNT instruction. This function takes the most conservative
/// approach and inserts an S_WAITCNT instruction after every read and
/// write.
void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I) const;
void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, unsigned Opocde) const;
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
unsigned VCCNode) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
public:
SITargetLowering(TargetMachine &tm);
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const;
virtual EVT getSetCCResultType(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual const char* getTargetNodeName(unsigned Opcode) const;
};
} // End namespace llvm
#endif //SIISELLOWERING_H

View file

@ -1,131 +0,0 @@
//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// SI Instruction format definitions.
//
// Instructions with _32 take 32-bit operands.
// Instructions with _64 take 64-bit operands.
//
// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
// encoding is the standard encoding, but instruction that make use of
// any of the instruction modifiers must use the 64-bit encoding.
//
// Instructions with _e32 use the 32-bit encoding.
// Instructions with _e64 use the 64-bit encoding.
//
//===----------------------------------------------------------------------===//
class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
: VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
: VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
: SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
: SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
: SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
: SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
: SOP2 <op, (outs VCCReg:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern> :
VOP1 <
op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
>;
multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
def _e64 : VOP3_64 <
{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern> :
VOP2 <
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
>;
multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
def _e64 : VOP3_64 <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
: SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
: SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern> :
VOPC <
op, (ins arc:$src0, vrc:$src1), opName, pattern
>;
multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> {
def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>;
def _e64 : VOP3_32 <
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
def _e64 : VOP3_64 <
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
: SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
: SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;

View file

@ -1,76 +0,0 @@
//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// SI Implementation of TargetInstrInfo.
//
//===----------------------------------------------------------------------===//
#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include <stdio.h>
using namespace llvm;
SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
RI(tm, *this),
TM(tm)
{ }
const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
{
return RI;
}
void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const
{
// If we are trying to copy to or from SCC, there is a bug somewhere else in
// the backend. While it may be theoretically possible to do this, it should
// never be necessary.
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const
{
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
MachineInstrBuilder(MI).addImm(Imm);
return MI;
}
bool SIInstrInfo::isMov(unsigned Opcode) const
{
switch(Opcode) {
default: return false;
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_IMM_F32:
case AMDGPU::V_MOV_IMM_I32:
case AMDGPU::S_MOV_IMM_I32:
return true;
}
}

View file

@ -1,62 +0,0 @@
//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Interface definition for SIInstrInfo.
//
//===----------------------------------------------------------------------===//
#ifndef SIINSTRINFO_H
#define SIINSTRINFO_H
#include "AMDGPUInstrInfo.h"
#include "SIRegisterInfo.h"
namespace llvm {
class SIInstrInfo : public AMDGPUInstrInfo {
private:
const SIRegisterInfo RI;
AMDGPUTargetMachine &TM;
public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
const SIRegisterInfo &getRegisterInfo() const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
/// getEncodingType - Returns the encoding type of this instruction.
unsigned getEncodingType(const MachineInstr &MI) const;
/// getEncodingBytes - Returns the size of this instructions encoding in
/// number of bytes.
unsigned getEncodingBytes(const MachineInstr &MI) const;
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const;
virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
virtual bool isMov(unsigned Opcode) const;
};
} // End namespace llvm
namespace SIInstrFlags {
enum Flags {
// First 4 bits are the instruction encoding
NEED_WAIT = 1 << 4
};
}
#endif //SIINSTRINFO_H

View file

@ -1,506 +0,0 @@
//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SI DAG Profiles
//===----------------------------------------------------------------------===//
def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
]>;
//===----------------------------------------------------------------------===//
// SI DAG Nodes
//===----------------------------------------------------------------------===//
// and operation on 64-bit wide vcc
def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
[SDNPCommutative, SDNPAssociative]
>;
// Special bitcast node for sharing VCC register between VALU and SALU
def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
>;
class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
AMDGPUInst<outs, ins, asm, pattern> {
field bits<4> EncodingType = 0;
field bits<1> NeedWait = 0;
let TSFlags{3-0} = EncodingType;
let TSFlags{4} = NeedWait;
}
class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
field bits<32> Inst;
}
class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
field bits<64> Inst;
}
class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
let EncoderMethod = "encodeOperand";
let MIOperandInfo = opInfo;
}
def IMM16bit : ImmLeaf <
i16,
[{return isInt<16>(Imm);}]
>;
def IMM8bit : ImmLeaf <
i32,
[{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
>;
def IMM12bit : ImmLeaf <
i16,
[{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
>;
def IMM32bitIn64bit : ImmLeaf <
i64,
[{return isInt<32>(Imm);}]
>;
class GPR4Align <RegisterClass rc> : Operand <vAny> {
let EncoderMethod = "GPR4AlignEncode";
let MIOperandInfo = (ops rc:$reg);
}
class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
let EncoderMethod = "GPR2AlignEncode";
let MIOperandInfo = (ops rc:$reg);
}
def SMRDmemrr : Operand<iPTR> {
let MIOperandInfo = (ops SReg_64, SReg_32);
let EncoderMethod = "GPR2AlignEncode";
}
def SMRDmemri : Operand<iPTR> {
let MIOperandInfo = (ops SReg_64, i32imm);
let EncoderMethod = "SMRDmemriEncode";
}
def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
let Uses = [EXEC] in {
def EXP : Enc64<
(outs),
(ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
"EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
[] > {
bits<4> EN;
bits<6> TGT;
bits<1> COMPR;
bits<1> DONE;
bits<1> VM;
bits<8> VSRC0;
bits<8> VSRC1;
bits<8> VSRC2;
bits<8> VSRC3;
let Inst{3-0} = EN;
let Inst{9-4} = TGT;
let Inst{10} = COMPR;
let Inst{11} = DONE;
let Inst{12} = VM;
let Inst{31-26} = 0x3e;
let Inst{39-32} = VSRC0;
let Inst{47-40} = VSRC1;
let Inst{55-48} = VSRC2;
let Inst{63-56} = VSRC3;
let EncodingType = 0; //SIInstrEncodingType::EXP
let NeedWait = 1;
let usesCustomInserter = 1;
}
class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64 <outs, ins, asm, pattern> {
bits<8> VDATA;
bits<4> DMASK;
bits<1> UNORM;
bits<1> GLC;
bits<1> DA;
bits<1> R128;
bits<1> TFE;
bits<1> LWE;
bits<1> SLC;
bits<8> VADDR;
bits<5> SRSRC;
bits<5> SSAMP;
let Inst{11-8} = DMASK;
let Inst{12} = UNORM;
let Inst{13} = GLC;
let Inst{14} = DA;
let Inst{15} = R128;
let Inst{16} = TFE;
let Inst{17} = LWE;
let Inst{24-18} = op;
let Inst{25} = SLC;
let Inst{31-26} = 0x3c;
let Inst{39-32} = VADDR;
let Inst{47-40} = VDATA;
let Inst{52-48} = SRSRC;
let Inst{57-53} = SSAMP;
let EncodingType = 2; //SIInstrEncodingType::MIMG
let NeedWait = 1;
let usesCustomInserter = 1;
}
class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64<outs, ins, asm, pattern> {
bits<8> VDATA;
bits<12> OFFSET;
bits<1> OFFEN;
bits<1> IDXEN;
bits<1> GLC;
bits<1> ADDR64;
bits<4> DFMT;
bits<3> NFMT;
bits<8> VADDR;
bits<5> SRSRC;
bits<1> SLC;
bits<1> TFE;
bits<8> SOFFSET;
let Inst{11-0} = OFFSET;
let Inst{12} = OFFEN;
let Inst{13} = IDXEN;
let Inst{14} = GLC;
let Inst{15} = ADDR64;
let Inst{18-16} = op;
let Inst{22-19} = DFMT;
let Inst{25-23} = NFMT;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = VADDR;
let Inst{47-40} = VDATA;
let Inst{52-48} = SRSRC;
let Inst{54} = SLC;
let Inst{55} = TFE;
let Inst{63-56} = SOFFSET;
let EncodingType = 3; //SIInstrEncodingType::MTBUF
let NeedWait = 1;
let usesCustomInserter = 1;
let neverHasSideEffects = 1;
}
class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64<outs, ins, asm, pattern> {
bits<8> VDATA;
bits<12> OFFSET;
bits<1> OFFEN;
bits<1> IDXEN;
bits<1> GLC;
bits<1> ADDR64;
bits<1> LDS;
bits<8> VADDR;
bits<5> SRSRC;
bits<1> SLC;
bits<1> TFE;
bits<8> SOFFSET;
let Inst{11-0} = OFFSET;
let Inst{12} = OFFEN;
let Inst{13} = IDXEN;
let Inst{14} = GLC;
let Inst{15} = ADDR64;
let Inst{16} = LDS;
let Inst{24-18} = op;
let Inst{31-26} = 0x38; //encoding
let Inst{39-32} = VADDR;
let Inst{47-40} = VDATA;
let Inst{52-48} = SRSRC;
let Inst{54} = SLC;
let Inst{55} = TFE;
let Inst{63-56} = SOFFSET;
let EncodingType = 4; //SIInstrEncodingType::MUBUF
let NeedWait = 1;
let usesCustomInserter = 1;
let neverHasSideEffects = 1;
}
} // End Uses = [EXEC]
class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32<outs, ins, asm, pattern> {
bits<7> SDST;
bits<15> PTR;
bits<8> OFFSET = PTR{7-0};
bits<1> IMM = PTR{8};
bits<6> SBASE = PTR{14-9};
let Inst{7-0} = OFFSET;
let Inst{8} = IMM;
let Inst{14-9} = SBASE;
let Inst{21-15} = SDST;
let Inst{26-22} = op;
let Inst{31-27} = 0x18; //encoding
let EncodingType = 5; //SIInstrEncodingType::SMRD
let NeedWait = 1;
let usesCustomInserter = 1;
}
class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32<outs, ins, asm, pattern> {
bits<7> SDST;
bits<8> SSRC0;
let Inst{7-0} = SSRC0;
let Inst{15-8} = op;
let Inst{22-16} = SDST;
let Inst{31-23} = 0x17d; //encoding;
let EncodingType = 6; //SIInstrEncodingType::SOP1
}
class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins, asm, pattern> {
bits<7> SDST;
bits<8> SSRC0;
bits<8> SSRC1;
let Inst{7-0} = SSRC0;
let Inst{15-8} = SSRC1;
let Inst{22-16} = SDST;
let Inst{29-23} = op;
let Inst{31-30} = 0x2; // encoding
let EncodingType = 7; // SIInstrEncodingType::SOP2
}
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32<outs, ins, asm, pattern> {
bits<8> SSRC0;
bits<8> SSRC1;
let Inst{7-0} = SSRC0;
let Inst{15-8} = SSRC1;
let Inst{22-16} = op;
let Inst{31-23} = 0x17e;
let EncodingType = 8; // SIInstrEncodingType::SOPC
let DisableEncoding = "$dst";
}
class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins , asm, pattern> {
bits <7> SDST;
bits <16> SIMM16;
let Inst{15-0} = SIMM16;
let Inst{22-16} = SDST;
let Inst{27-23} = op;
let Inst{31-28} = 0xb; //encoding
let EncodingType = 9; // SIInstrEncodingType::SOPK
}
class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
(outs),
ins,
asm,
pattern > {
bits <16> SIMM16;
let Inst{15-0} = SIMM16;
let Inst{22-16} = op;
let Inst{31-23} = 0x17f; // encoding
let EncodingType = 10; // SIInstrEncodingType::SOPP
}
let Uses = [EXEC] in {
class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins, asm, pattern> {
bits<8> VDST;
bits<8> VSRC;
bits<2> ATTRCHAN;
bits<6> ATTR;
let Inst{7-0} = VSRC;
let Inst{9-8} = ATTRCHAN;
let Inst{15-10} = ATTR;
let Inst{17-16} = op;
let Inst{25-18} = VDST;
let Inst{31-26} = 0x32; // encoding
let EncodingType = 11; // SIInstrEncodingType::VINTRP
let neverHasSideEffects = 1;
}
class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins, asm, pattern> {
bits<8> VDST;
bits<9> SRC0;
let Inst{8-0} = SRC0;
let Inst{16-9} = op;
let Inst{24-17} = VDST;
let Inst{31-25} = 0x3f; //encoding
let EncodingType = 12; // SIInstrEncodingType::VOP1
let PostEncoderMethod = "VOPPostEncode";
}
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins, asm, pattern> {
bits<8> VDST;
bits<9> SRC0;
bits<8> VSRC1;
let Inst{8-0} = SRC0;
let Inst{16-9} = VSRC1;
let Inst{24-17} = VDST;
let Inst{30-25} = op;
let Inst{31} = 0x0; //encoding
let EncodingType = 13; // SIInstrEncodingType::VOP2
let PostEncoderMethod = "VOPPostEncode";
}
class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64 <outs, ins, asm, pattern> {
bits<8> VDST;
bits<9> SRC0;
bits<9> SRC1;
bits<9> SRC2;
bits<3> ABS;
bits<1> CLAMP;
bits<2> OMOD;
bits<3> NEG;
let Inst{7-0} = VDST;
let Inst{10-8} = ABS;
let Inst{11} = CLAMP;
let Inst{25-17} = op;
let Inst{31-26} = 0x34; //encoding
let Inst{40-32} = SRC0;
let Inst{49-41} = SRC1;
let Inst{58-50} = SRC2;
let Inst{60-59} = OMOD;
let Inst{63-61} = NEG;
let EncodingType = 14; // SIInstrEncodingType::VOP3
let PostEncoderMethod = "VOPPostEncode";
}
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
bits<9> SRC0;
bits<8> VSRC1;
let Inst{8-0} = SRC0;
let Inst{16-9} = VSRC1;
let Inst{24-17} = op;
let Inst{31-25} = 0x3e;
let EncodingType = 15; //SIInstrEncodingType::VOPC
let PostEncoderMethod = "VOPPostEncode";
let DisableEncoding = "$dst";
}
} // End Uses = [EXEC]
class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
op,
(outs VReg_128:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
asm,
[]
>;
class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
op,
(outs regClass:$dst),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
i1imm:$tfe, SReg_32:$soffset),
asm,
[]> {
let mayLoad = 1;
}
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs regClass:$dst),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
asm,
[]> {
let mayLoad = 1;
}
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
(ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
asm,
[]> {
let mayStore = 1;
}
multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
ValueType vt> {
def _IMM : SMRD <
op,
(outs dstClass:$dst),
(ins SMRDmemri:$src0),
asm,
[(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
>;
def _SGPR : SMRD <
op,
(outs dstClass:$dst),
(ins SMRDmemrr:$src0),
asm,
[(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
>;
}
multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
}
include "SIInstrFormats.td"
include "SIInstructions.td"

File diff suppressed because it is too large Load diff

View file

@ -1,40 +0,0 @@
//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// SI Intrinsic Definitions
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
/* XXX: We may need a seperate intrinsic here for loading integer values */
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
def int_SI_wqm : Intrinsic <[], [], []>;
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
/* Interpolation Intrinsics */
def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
def int_SI_fs_interp_linear_center : Interp;
def int_SI_fs_interp_linear_centroid : Interp;
def int_SI_fs_interp_persp_center : Interp;
def int_SI_fs_interp_persp_centroid : Interp;
def int_SI_fs_interp_constant : Interp;
def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
}

View file

@ -1,161 +0,0 @@
//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF)
// to predicated instructions.
//
// All flow control (except loops) is handled using predicated instructions and
// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
// by writting to the 64-bit EXEC register (each bit corresponds to a
// single vector ALU). Typically, for predicates, a vector ALU will write
// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
// Vector ALU) and then the ScalarALU will AND the VCC register with the
// EXEC to update the predicates.
//
// For example:
// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
// SI_IF_NZ %VCC
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
// ELSE
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
// ENDIF
//
// becomes:
//
// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask
// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask
// S_CBRANCH_EXECZ label0 // This instruction is an
// // optimization which allows us to
// // branch if all the bits of
// // EXEC are zero.
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
//
// label0:
// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the
// // Then block.
// %EXEC = S_AND_B64 %SGPR0, %EXEC
// S_BRANCH_EXECZ label1 // Use our branch optimization
// // instruction again.
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
// label1:
// S_MOV_B64 // Restore the old EXEC value
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
class SILowerFlowControlPass : public MachineFunctionPass {
private:
static char ID;
const TargetInstrInfo *TII;
std::vector<unsigned> PredicateStack;
std::vector<unsigned> UnusedRegisters;
void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
public:
SILowerFlowControlPass(TargetMachine &tm) :
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "SI Lower flow control instructions";
}
};
} // End anonymous namespace
char SILowerFlowControlPass::ID = 0;
FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) {
return new SILowerFlowControlPass(tm);
}
bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) {
// Find all the unused registers that can be used for the predicate stack.
for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(),
I = AMDGPU::SReg_64RegClass.end();
I != S; --I) {
unsigned Reg = *I;
if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
UnusedRegisters.push_back(Reg);
}
}
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
I != MBB.end(); I = Next, Next = llvm::next(I)) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF_NZ:
pushExecMask(MBB, I);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
AMDGPU::EXEC)
.addOperand(MI.getOperand(0)) // VCC
.addReg(AMDGPU::EXEC);
MI.eraseFromParent();
break;
case AMDGPU::ELSE:
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64),
AMDGPU::EXEC)
.addReg(AMDGPU::EXEC);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
AMDGPU::EXEC)
.addReg(PredicateStack.back())
.addReg(AMDGPU::EXEC);
MI.eraseFromParent();
break;
case AMDGPU::ENDIF:
popExecMask(MBB, I);
MI.eraseFromParent();
break;
}
}
}
return false;
}
void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
unsigned StackReg = UnusedRegisters.back();
UnusedRegisters.pop_back();
PredicateStack.push_back(StackReg);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
StackReg)
.addReg(AMDGPU::EXEC);
}
void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
unsigned StackReg = PredicateStack.back();
PredicateStack.pop_back();
UnusedRegisters.push_back(StackReg);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
AMDGPU::EXEC)
.addReg(StackReg);
}

View file

@ -1,105 +0,0 @@
//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This pass performs the following transformation on instructions with
// literal constants:
//
// %VGPR0 = V_MOV_IMM_I32 1
//
// becomes:
//
// BUNDLE
// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
// * SI_LOAD_LITERAL 1
//
// The resulting sequence matches exactly how the hardware handles immediate
// operands, so this transformation greatly simplifies the code generator.
//
// Only the *_MOV_IMM_* support immediate operands at the moment, but when
// support for immediate operands is added to other instructions, they
// will be lowered here as well.
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
using namespace llvm;
namespace {
class SILowerLiteralConstantsPass : public MachineFunctionPass {
private:
static char ID;
const TargetInstrInfo *TII;
public:
SILowerLiteralConstantsPass(TargetMachine &tm) :
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "SI Lower literal constants pass";
}
};
} // End anonymous namespace
char SILowerLiteralConstantsPass::ID = 0;
FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
return new SILowerLiteralConstantsPass(tm);
}
bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
I != MBB.end(); I = Next, Next = llvm::next(I)) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
default: break;
case AMDGPU::S_MOV_IMM_I32:
case AMDGPU::S_MOV_IMM_I64:
case AMDGPU::V_MOV_IMM_F32:
case AMDGPU::V_MOV_IMM_I32: {
unsigned MovOpcode;
unsigned LoadLiteralOpcode;
MachineOperand LiteralOp = MI.getOperand(1);
if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
MovOpcode = AMDGPU::V_MOV_B32_e32;
} else {
MovOpcode = AMDGPU::S_MOV_B32;
}
if (LiteralOp.isImm()) {
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
} else {
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
}
MachineInstr *First =
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode),
MI.getOperand(0).getReg())
.addReg(AMDGPU::SI_LITERAL_CONSTANT);
MachineInstr *Last =
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode))
.addOperand(MI.getOperand(1));
Last->setIsInsideBundle();
llvm::finalizeBundle(MBB, First, Last);
MI.eraseFromParent();
break;
}
}
}
}
return false;
}

View file

@ -1,19 +0,0 @@
//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "SIMachineFunctionInfo.h"
using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo(),
SPIPSInputAddr(0),
ShaderType(0)
{ }

View file

@ -1,38 +0,0 @@
//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config
// register, which is to tell the hardware which interpolation parameters to
// load.
//
//===----------------------------------------------------------------------===//
#ifndef _SIMACHINEFUNCTIONINFO_H_
#define _SIMACHINEFUNCTIONINFO_H_
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
class SIMachineFunctionInfo : public MachineFunctionInfo {
private:
public:
SIMachineFunctionInfo(const MachineFunction &MF);
unsigned SPIPSInputAddr;
unsigned ShaderType;
};
} // End namespace llvm
#endif //_SIMACHINEFUNCTIONINFO_H_

View file

@ -1,60 +0,0 @@
//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the SI implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#include "SIRegisterInfo.h"
#include "AMDGPUTargetMachine.h"
using namespace llvm;
SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
const TargetInstrInfo &tii)
: AMDGPURegisterInfo(tm, tii),
TM(tm),
TII(tii)
{ }
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const
{
BitVector Reserved(getNumRegs());
return Reserved;
}
unsigned SIRegisterInfo::getBinaryCode(unsigned reg) const
{
switch (reg) {
case AMDGPU::M0: return 124;
case AMDGPU::SREG_LIT_0: return 128;
default: return getHWRegNum(reg);
}
}
const TargetRegisterClass *
SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
{
switch (rc->getID()) {
case AMDGPU::GPRF32RegClassID:
return &AMDGPU::VReg_32RegClass;
default: return rc;
}
}
const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
MVT VT) const
{
switch(VT.SimpleTy) {
default:
case MVT::i32: return &AMDGPU::VReg_32RegClass;
}
}
#include "SIRegisterGetHWRegNum.inc"

View file

@ -1,54 +0,0 @@
//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Interface definition for SIRegisterInfo
//
//===----------------------------------------------------------------------===//
#ifndef SIREGISTERINFO_H_
#define SIREGISTERINFO_H_
#include "AMDGPURegisterInfo.h"
namespace llvm {
class AMDGPUTargetMachine;
class TargetInstrInfo;
struct SIRegisterInfo : public AMDGPURegisterInfo
{
AMDGPUTargetMachine &TM;
const TargetInstrInfo &TII;
SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
/// getBinaryCode - Returns the hardware encoding for a register
virtual unsigned getBinaryCode(unsigned reg) const;
/// getISARegClass - rc is an AMDIL reg class. This function returns the
/// SI register class that is equivalent to the given AMDIL register class.
virtual const TargetRegisterClass *
getISARegClass(const TargetRegisterClass * rc) const;
/// getHWRegNum - Generated function that returns the hardware encoding for
/// a register
unsigned getHWRegNum(unsigned reg) const;
/// getCFGStructurizerRegClass - get the register class of the specified
/// type to use in the CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
};
} // End namespace llvm
#endif // SIREGISTERINFO_H_

View file

@ -1,15 +0,0 @@
//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// TODO: This is just a place holder for now.
//
//===----------------------------------------------------------------------===//
def SI_Itin : ProcessorItineraries <[], [], []>;

View file

@ -1,26 +0,0 @@
//===-- TargetInfo/AMDGPUTargetInfo.cpp - TODO: Add brief description -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// TODO: Add full description
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
/// The target for the AMDGPU backend
Target llvm::TheAMDGPUTarget;
/// Extern function to initialize the targets for the AMDGPU backend
extern "C" void LLVMInitializeAMDGPUTargetInfo() {
RegisterTarget<Triple::r600, false>
R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
}

View file

@ -1,35 +0,0 @@
#include "radeon_llvm_emit.h"
#include <llvm/Support/CommandLine.h>
#include <llvm/Support/IRReader.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/LLVMContext.h>
#include <llvm/Module.h>
#include <stdio.h>
#include <llvm-c/Core.h>
using namespace llvm;
static cl::opt<std::string>
InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
static cl::opt<std::string>
TargetGPUName("gpu", cl::desc("target gpu name"), cl::value_desc("gpu_name"));
int main(int argc, char ** argv)
{
unsigned char * bytes;
unsigned byte_count;
std::auto_ptr<Module> M;
LLVMContext &Context = getGlobalContext();
SMDiagnostic Err;
cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
M.reset(ParseIRFile(InputFilename, Err, Context));
Module * mod = M.get();
radeon_llvm_compile(wrap(mod), &bytes, &byte_count, TargetGPUName.c_str(), 1);
}

View file

@ -39,12 +39,7 @@
#include <llvm/Target/TargetMachine.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm-c/Target.h>
#if HAVE_LLVM < 0x0302
#include <llvm/Target/TargetData.h>
#else
#include <llvm/DataLayout.h>
#endif
#include <iostream>
#include <stdlib.h>
@ -52,16 +47,6 @@
using namespace llvm;
#ifndef EXTERNAL_LLVM
extern "C" {
void LLVMInitializeAMDGPUAsmPrinter(void);
void LLVMInitializeAMDGPUTargetMC(void);
void LLVMInitializeAMDGPUTarget(void);
void LLVMInitializeAMDGPUTargetInfo(void);
}
#endif
namespace {
class LLVMEnsureMultithreaded {
@ -89,17 +74,10 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
Triple AMDGPUTriple(sys::getDefaultTargetTriple());
#if HAVE_LLVM == 0x0302
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUAsmPrinter();
#else
LLVMInitializeR600TargetInfo();
LLVMInitializeR600Target();
LLVMInitializeR600TargetMC();
LLVMInitializeR600AsmPrinter();
#endif
std::string err;
const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err);
@ -130,11 +108,7 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
));
TargetMachine &AMDGPUTargetMachine = *tm.get();
PassManager PM;
#if HAVE_LLVM < 0x0302
PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData()));
#else
PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout()));
#endif
PM.add(createPromoteMemoryToRegisterPass());
AMDGPUTargetMachine.setAsmVerbosityDefault(true);